From a1de562f5c569cc85c2d2e3cddc82970f503b2d4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 24 Dec 2022 14:36:57 +0000 Subject: First part of fix for [4a7397e0b3]: Take flags into account when deciding to do a binary fcopy or not. TODO: Handle generating an exception --- generic/tclIO.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index cfb97ec..98315db 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -9347,7 +9347,9 @@ TclCopyChannel( moveBytes = inStatePtr->inEofChar == '\0' /* No eofChar to stop input */ && inStatePtr->inputTranslation == TCL_TRANSLATE_LF && outStatePtr->outputTranslation == TCL_TRANSLATE_LF - && inStatePtr->encoding == outStatePtr->encoding; + && inStatePtr->encoding == outStatePtr->encoding + && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT + && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; /* * Allocate a new CopyState to maintain info about the current copy in @@ -9674,7 +9676,9 @@ CopyData( inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); - sameEncoding = (inStatePtr->encoding == outStatePtr->encoding); + sameEncoding = inStatePtr->encoding == outStatePtr->encoding + && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT + && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; if (!(inBinary || sameEncoding)) { TclNewObj(bufObj); -- cgit v0.12 From 4a8b2cf10f8ebcebaa9d3546f3399d3d9a8aa00e Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 28 Dec 2022 12:07:18 +0000 Subject: A better fix for [b8f575aa23], as it maintains the expectation that synchronous [read] results in an error when invalid data is encountered. someone other than pooryorick: Pushed this check-in back on to a review branch. It needs more baking/review. As is, it makes two tests fail, and it introduces a new element "-result" to the return options dictionary. --- generic/tclIO.c | 13 +++++++++++-- generic/tclIOCmd.c | 10 +++++++++- tests/io.test | 29 ++++++++++++++++------------- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index e6e3560..63b9a7d 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6041,7 +6041,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest isn't needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6055,7 +6055,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest isn't needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6084,6 +6084,15 @@ DoReadChars( } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &factor); } + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Channel is Synchronous. Return an error so that [read] and + * friends can return an error + */ + TclChannelRelease((Tcl_Channel)chanPtr); + UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); + return -1; + } /* * If the current buffer is empty recycle it. diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index e8a534f..8794365 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -381,7 +381,7 @@ Tcl_ReadObjCmd( int toRead; /* How many bytes to read? */ int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *resultPtr, *chanObjPtr; + Tcl_Obj *resultPtr, *returnOptsPtr, *chanObjPtr; if ((objc != 2) && (objc != 3)) { Interp *iPtr; @@ -470,8 +470,16 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + returnOptsPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-code", -1) + , Tcl_NewStringObj("error", -1)); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-level", -1) + , Tcl_NewIntObj(0)); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) + , resultPtr); TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); + Tcl_SetReturnOptions(interp, returnOptsPtr); return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index d10e1e4..451a790 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9056,12 +9056,12 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { - set d [read $f] + set status [catch {read $f} cres copts] + set d [dict get $copts -result] binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg + lappend hd $status $cres } -cleanup { + close $f removeFile io-75.6 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} @@ -9075,11 +9075,12 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { - set d [read $f] + set status [catch {read $f} cres copts] + set d [dict get $copts -result] binary scan $d H* hd lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg + lappend hd $status + lappend hd $cres fconfigure $f -encoding iso8859-1 lappend hd [read $f];# We changed encoding, so now we can read the \xA1 close $f @@ -9157,10 +9158,11 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { seek $f 0 fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { - set d [read $f] + set status [catch {read $f} cres copts] + set d [dict get $copts -result] binary scan $d H* hd - lappend hd [catch {set d [read $f]} msg] - lappend hd $msg + lappend hd $status + lappend hd $cres } -cleanup { close $f removeFile io-75.11 @@ -9192,11 +9194,12 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { - set d [read $f] + set status [catch {read $f} cres copts] + set d [dict get $copts -result] binary scan $d H* hd - lappend hd [catch {read $f} msg] + lappend hd $status close $f - lappend hd $msg + lappend hd $cres } -cleanup { removeFile io-75.13 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -- cgit v0.12 From dbd4edc3d53208fc92252173a0714d8f6524d1a1 Mon Sep 17 00:00:00 2001 From: fvogel Date: Wed, 28 Dec 2022 20:39:17 +0000 Subject: Fix [8e811bc1f1]: Wrong formatting of arguments in man page --- tools/tcltk-man2html-utils.tcl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/tcltk-man2html-utils.tcl b/tools/tcltk-man2html-utils.tcl index 6e4f1fb..1e9e31e 100644 --- a/tools/tcltk-man2html-utils.tcl +++ b/tools/tcltk-man2html-utils.tcl @@ -583,6 +583,7 @@ proc output-IP-list {context code rest} { backup-text 1 set accept_RE 0 set para {} + set endpara {} while {[more-text]} { set line [next-text] if {[is-a-directive $line]} { @@ -594,7 +595,7 @@ proc output-IP-list {context code rest} { continue } if {$manual(section) eq "ARGUMENTS"} { - man-puts "$para
$rest
" + man-puts "$para
$rest
$endpara" } elseif {[regexp {^\[([\da-f]+)\]$} $rest -> value]} { man-puts "$para
  • " } elseif {[regexp {^\(?([\da-f]+)\)$} $rest -> value]} { @@ -664,6 +665,7 @@ proc output-IP-list {context code rest} { man-puts $line } set para

    + set endpara

    } man-puts "$para$enddl" lappend manual(section-toc) $enddl -- cgit v0.12 From f2cc84c99a732dbde0a6845d0809443e43276d17 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 28 Dec 2022 22:46:37 +0000 Subject: Update fix so that the two failing tests, iocmd-23.8 and iortrans-4.7 now pass. --- generic/tclIOCmd.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 8794365..e5ba298 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -471,10 +471,6 @@ Tcl_ReadObjCmd( TclGetString(chanObjPtr), Tcl_PosixError(interp))); } returnOptsPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-code", -1) - , Tcl_NewStringObj("error", -1)); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-level", -1) - , Tcl_NewIntObj(0)); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) , resultPtr); TclChannelRelease(chan); -- cgit v0.12 From 23d30205a5621acc748c0c3b6ad79a21bb80e327 Mon Sep 17 00:00:00 2001 From: fvogel Date: Thu, 29 Dec 2022 17:04:43 +0000 Subject: Instead of fiddling with html tags, simply enlarge a bit the width in the CSS stylesheet. --- tools/tcltk-man2html-utils.tcl | 4 +--- tools/tcltk-man2html.tcl | 4 ++-- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/tools/tcltk-man2html-utils.tcl b/tools/tcltk-man2html-utils.tcl index 1e9e31e..6e4f1fb 100644 --- a/tools/tcltk-man2html-utils.tcl +++ b/tools/tcltk-man2html-utils.tcl @@ -583,7 +583,6 @@ proc output-IP-list {context code rest} { backup-text 1 set accept_RE 0 set para {} - set endpara {} while {[more-text]} { set line [next-text] if {[is-a-directive $line]} { @@ -595,7 +594,7 @@ proc output-IP-list {context code rest} { continue } if {$manual(section) eq "ARGUMENTS"} { - man-puts "$para
    $rest
    $endpara" + man-puts "$para
    $rest
    " } elseif {[regexp {^\[([\da-f]+)\]$} $rest -> value]} { man-puts "$para
  • " } elseif {[regexp {^\(?([\da-f]+)\)$} $rest -> value]} { @@ -665,7 +664,6 @@ proc output-IP-list {context code rest} { man-puts $line } set para

    - set endpara

    } man-puts "$para$enddl" lappend manual(section-toc) $enddl diff --git a/tools/tcltk-man2html.tcl b/tools/tcltk-man2html.tcl index 236a49f..caececa 100755 --- a/tools/tcltk-man2html.tcl +++ b/tools/tcltk-man2html.tcl @@ -221,14 +221,14 @@ proc css-stylesheet {} { font-size: 11px; } css-style ".keylist dt" ".arguments dt" { - width: 20em; + width: 25em; float: left; padding: 2px; border-top: 1px solid #999999; } css-style ".keylist dt" { font-weight: bold; } css-style ".keylist dd" ".arguments dd" { - margin-left: 20em; + margin-left: 25em; padding: 2px; border-top: 1px solid #999999; } -- cgit v0.12 From 3b45005127de0885251471d5591ecb58c5b3e286 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Thu, 29 Dec 2022 22:59:10 +0000 Subject: Arrange new code in DoReadChars to ensure that final steps are always taken. --- generic/tclIO.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 63b9a7d..9ae8fb5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6024,8 +6024,9 @@ DoReadChars( } if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); return -1; } @@ -6041,7 +6042,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: UpdateInterest isn't needed here? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6055,7 +6056,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: UpdateInterest isn't needed here? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6084,18 +6085,9 @@ DoReadChars( } else { copiedNow = ReadChars(statePtr, objPtr, toRead, &factor); } - if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { - /* Channel is Synchronous. Return an error so that [read] and - * friends can return an error - */ - TclChannelRelease((Tcl_Channel)chanPtr); - UpdateInterest(chanPtr); - Tcl_SetErrno(EILSEQ); - return -1; - } /* - * If the current buffer is empty recycle it. + * Recycle current buffer if empty. */ bufPtr = statePtr->inQueueHead; @@ -6108,6 +6100,15 @@ DoReadChars( statePtr->inQueueTail = NULL; } } + + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Channel is synchronous. Return an error so that callers + * like [read] can return an error. + */ + Tcl_SetErrno(EILSEQ); + copied = -1; + goto finish; + } } if (copiedNow < 0) { @@ -6136,6 +6137,7 @@ DoReadChars( } } +finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the -- cgit v0.12 From a801c2d4741015dbb5875938248eff1701e1ff29 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 30 Dec 2022 20:27:47 +0000 Subject: Fix DoReadChars() to correctly discard encoding errors after eofchar has been seen, and add new test, io-75.8.invalid. --- generic/tclEncoding.c | 7 ++++++- generic/tclIO.c | 16 ++++++++++++++-- tests/io.test | 36 ++++++++++++++++++++++++++++++++++-- 3 files changed, 54 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d10d9ca..37b3073 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2386,7 +2386,12 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + && (UCHAR(src[1]) == 0x80) + && ( + !(flags & TCL_ENCODING_MODIFIED) + || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + )) + { /* * If in input mode, and -strict is specified: This is an error. */ diff --git a/generic/tclIO.c b/generic/tclIO.c index 9ae8fb5..3b47de5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6101,7 +6101,16 @@ DoReadChars( } } - if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* + * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, + * then CHANNEL_ENCODING_ERROR was caused by data that occurred + * after the EOF character was encountered, so it doesn't count as + * a real error. + */ + + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && !GotFlag(statePtr, CHANNEL_STICKY_EOF) + && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { /* Channel is synchronous. Return an error so that callers * like [read] can return an error. */ @@ -6816,11 +6825,14 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. + * + * If CHANNEL_ENCODING_ERROR is set, it can only be because of data + * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); } } diff --git a/tests/io.test b/tests/io.test index 451a790..aece338 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9089,11 +9089,15 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { removeFile io-75.7 } -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} -test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.8.incomplete { + incomplete uft-8 char after eof char is not an error (-strictencoding 1) +} -setup { + set hd {} set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes precedence. + # \x81 is invalid and also incomplete utf-8 data, but because the eof + # character \x1A appears first, it's not an error. puts -nonewline $f A\x1A\x81 flush $f seek $f 0 @@ -9102,6 +9106,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set d [read $f] binary scan $d H* hd lappend hd [eof $f] + # there should be no error on additional reads lappend hd [read $f] close $f set hd @@ -9109,6 +9114,33 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { removeFile io-75.8 } -result {41 1 {}} + +test io-75.8.invalid {invalid utf-8 after eof char is not an error (-strictencoding 1)} -setup { + set res {} + set fn [makeFile {} io-75.8] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A + # appears first, it's not an error. + puts -nonewline $f A\x1a\xc0\x80 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 +} -body { + set d [read $f] + foreach char [split $d {}] { + lappend res [format %x [scan $char %c]] + } + lappend res [eof $f] + # there should be no error on additional reads + lappend res [read $f] + close $f + set res +} -cleanup { + removeFile io-75.8 +} -result {41 1 {}} + + test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] -- cgit v0.12 From 63e04b3c2dc7ecaf014a93f2116b5913a256e875 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 30 Dec 2022 21:05:56 +0000 Subject: New test, io-12.9.strict, for issue report [1bedc53c8cb878f0]. --- tests/io.test | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tests/io.test b/tests/io.test index aece338..6fb3587 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1555,11 +1555,29 @@ test io-12.9 {ReadChars: multibyte chars split} -body { set f [open $path(test1)] fconfigure $f -encoding utf-8 -buffersize 10 set in [read $f] + read $f close $f scan [string index $in end] %c } -cleanup { catch {close $f} } -result 194 +test io-12.9.strict {ReadChars: multibyte chars split} -body { + set res {} + set f [open $path(test1) w] + fconfigure $f -translation binary + puts -nonewline $f [string repeat a 9]\xC2 + close $f + set f [open $path(test1)] + fconfigure $f -encoding utf-8 -strictencoding 1 -buffersize 10 + set status [catch {read $f} cres copts] + set in [dict get $copts -result] + lappend res $in + lappend res $status $cres + set res +} -cleanup { + close $f + catch {close $f} +} -match glob -result {aaaaaaaaa 1 {error reading "*": illegal byte sequence}} test io-12.10 {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary -- cgit v0.12 From 3919b0a0b4e371b574d16adaa1c73df6da8007ce Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 30 Dec 2022 21:53:47 +0000 Subject: Add test for [gets] in non-strict mode after an encoding error. --- tests/io.test | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/io.test b/tests/io.test index 6fb3587..2fa06ea 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9254,6 +9254,28 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - removeFile io-75.13 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} +test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup { + set fn [makeFile {} io-75.14] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xc0 is invalid in utf-8 + puts -nonewline $f a\nb\xc0\nc\n + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -strictencoding 1 +} -body { + lappend res [gets $f] + set status [catch {gets $f} cres copts] + lappend res $status $cres + chan configure $f -strictencoding 0 + lappend res [gets $f] + lappend res [gets $f] + close $f + return $res +} -cleanup { + removeFile io-75.14 +} -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} + # ### ### ### ######### ######### ######### -- cgit v0.12 From 985ea00b16865c0dccc99eb9b006f97e8e59edb0 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 2 Jan 2023 23:12:02 +0000 Subject: Merge py-b8f575aa23: Fix for [154ed7ce56], Tcl 9: [gets] on -strictencoding 1 configured channel. --- generic/tclIO.c | 28 ++++++++++++++++++++++++++-- generic/tclIOCmd.c | 7 +++++-- tests/io.test | 32 ++++++++++++++++++++++++++++++-- 3 files changed, 61 insertions(+), 6 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3b47de5..81af96e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4656,7 +4656,8 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - int oldLength; + int reportError = 0; + size_t oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; @@ -4664,6 +4665,7 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); + ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } @@ -4938,6 +4940,19 @@ Tcl_GetsObj( goto done; } goto gotEOL; + } else if (gs.bytesWrote == 0 + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + /* Set eol to the position that caused the encoding error, and then + * coninue to gotEOL, which stores the data that was decoded + * without error to objPtr. This allows the caller to do something + * useful with the data decoded so far, and also results in the + * position of the file being the first byte that was not + * succesfully decoded, allowing further processing at exactly that + * point, if desired. + */ + eol = dstEnd; + reportError = 1; + goto gotEOL; } dst = dstEnd; } @@ -4981,7 +4996,16 @@ Tcl_GetsObj( Tcl_SetObjLength(objPtr, eol - objPtr->bytes); CommonGetsCleanup(chanPtr); ResetFlag(statePtr, CHANNEL_BLOCKED); - copiedTotal = gs.totalChars + gs.charsWrote - skip; + if (reportError) { + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); + /* reset CHANNEL_ENCODING_ERROR to afford a chance to reconfigure + * the channel and try again + */ + Tcl_SetErrno(EILSEQ); + copiedTotal = -1; + } else { + copiedTotal = gs.totalChars + gs.charsWrote - skip; + } goto done; /* diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index e5ba298..bc52b8e 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -295,7 +295,7 @@ Tcl_GetsObjCmd( Tcl_Channel chan; /* The channel to read from. */ int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *linePtr, *chanObjPtr; + Tcl_Obj *linePtr, *chanObjPtr, *returnOptsPtr; int code = TCL_OK; if ((objc != 2) && (objc != 3)) { @@ -318,7 +318,6 @@ Tcl_GetsObjCmd( lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen < 0) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { - Tcl_DecrRefCount(linePtr); /* * TIP #219. @@ -332,7 +331,11 @@ Tcl_GetsObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + returnOptsPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) + , linePtr); code = TCL_ERROR; + Tcl_SetReturnOptions(interp, returnOptsPtr); goto done; } lineLen = TCL_INDEX_NONE; diff --git a/tests/io.test b/tests/io.test index 2fa06ea..854759e 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9255,6 +9255,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - } -match glob -result {41 1 {error reading "*": illegal byte sequence}} test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup { + set res {} set fn [makeFile {} io-75.14] set f [open $fn w+] fconfigure $f -encoding binary @@ -9271,13 +9272,40 @@ test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after e lappend res [gets $f] lappend res [gets $f] close $f - return $res + return $res } -cleanup { removeFile io-75.14 } -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} -# ### ### ### ######### ######### ######### +test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { + set res {} + set fn [makeFile {} io-75.15] + set chan [open $fn w+] + fconfigure $chan -encoding binary + # This is not valid UTF-8 + puts $chan hello\nAB\xc0\x40CD\nEFG + close $chan +} -body { + #Now try to read it with [gets] + set chan [open $fn] + fconfigure $chan -encoding utf-8 -strictencoding 1 + lappend res [gets $chan] + set status [catch {gets $chan} cres copts] + lappend res $status $cres + set status [catch {gets $chan} cres copts] + lappend res $status $cres + lappend res [dict get $copts -result] + chan configur $chan -encoding binary + foreach char [split [read $chan 2] {}] { + lappend res [format %x [scan $char %c]] + } + return $res +} -cleanup { + close $chan + removeFile io-75.15 +} -match glob -result {hello 1 {error reading "*": illegal byte sequence}\ + 1 {error reading "*": illegal byte sequence} AB c0 40} test io-76.0 {channel modes} -setup { -- cgit v0.12 From ce9fb3bcfb5727f83db328e2ee54bc6b56c6e7ea Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 4 Jan 2023 23:21:16 +0000 Subject: Fix [0f19edcb78]: Windows 11 not reported in tcl_platform(osVersion) --- unix/tclUnixInit.c | 3 +++ win/tclWinInit.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/unix/tclUnixInit.c b/unix/tclUnixInit.c index 7467938..47b8df3 100644 --- a/unix/tclUnixInit.c +++ b/unix/tclUnixInit.c @@ -895,6 +895,9 @@ TclpSetVariables( GetSystemInfo(&sysInfo); + if (osInfo.dwMajorVersion == 10 && osInfo.dwBuildNumber >= 22000) { + osInfo.dwMajorVersion = 11; + } Tcl_SetVar2(interp, "tcl_platform", "os", "Windows NT", TCL_GLOBAL_ONLY); sprintf(buffer, "%d.%d", osInfo.dwMajorVersion, osInfo.dwMinorVersion); Tcl_SetVar2(interp, "tcl_platform", "osVersion", buffer, TCL_GLOBAL_ONLY); diff --git a/win/tclWinInit.c b/win/tclWinInit.c index eae4404..582c700 100644 --- a/win/tclWinInit.c +++ b/win/tclWinInit.c @@ -552,6 +552,9 @@ TclpSetVariables( TCL_GLOBAL_ONLY); Tcl_SetVar2(interp, "tcl_platform", "os", "Windows NT", TCL_GLOBAL_ONLY); + if (osInfo.dwMajorVersion == 10 && osInfo.dwBuildNumber >= 22000) { + osInfo.dwMajorVersion = 11; + } wsprintfA(buffer, "%d.%d", osInfo.dwMajorVersion, osInfo.dwMinorVersion); Tcl_SetVar2(interp, "tcl_platform", "osVersion", buffer, TCL_GLOBAL_ONLY); if (sys.oemId.wProcessorArchitecture < NUMPROCESSORS) { -- cgit v0.12 From 72eb4e42290dccb6db60a66a085c355fb1e779e9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 6 Jan 2023 16:56:14 +0000 Subject: Fix [ad393071c2]: Use different LD_LIBRARY_PATH_VAR on macOS --- unix/configure | 2 +- unix/tcl.m4 | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/unix/configure b/unix/configure index 8981ef8..94ecfc6 100755 --- a/unix/configure +++ b/unix/configure @@ -7898,7 +7898,7 @@ fi CC_SEARCH_FLAGS="" LD_SEARCH_FLAGS="" - LD_LIBRARY_PATH_VAR="DYLD_LIBRARY_PATH" + LD_LIBRARY_PATH_VAR="DYLD_FALLBACK_LIBRARY_PATH" cat >>confdefs.h <<\_ACEOF #define MAC_OSX_TCL 1 diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 6063847..6cee92c 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1594,7 +1594,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ ]) CC_SEARCH_FLAGS="" LD_SEARCH_FLAGS="" - LD_LIBRARY_PATH_VAR="DYLD_LIBRARY_PATH" + LD_LIBRARY_PATH_VAR="DYLD_FALLBACK_LIBRARY_PATH" AC_DEFINE(MAC_OSX_TCL, 1, [Is this a Mac I see before me?]) PLAT_OBJS='${MAC_OSX_OBJS}' PLAT_SRCS='${MAC_OSX_SRCS}' -- cgit v0.12 From 805fa175fc88005a9955a6202f05d17b91b70c19 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Sun, 8 Jan 2023 10:07:46 +0000 Subject: For [read] and [gets] encoding errors, use "-result read" in return options dictionary instead of just "-result". --- generic/tclIOCmd.c | 14 ++++-- tests/io.test | 141 ++++++++++++++++++++++++++++++++++++----------------- 2 files changed, 106 insertions(+), 49 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index bc52b8e..2eeb04c 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -295,7 +295,7 @@ Tcl_GetsObjCmd( Tcl_Channel chan; /* The channel to read from. */ int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *linePtr, *chanObjPtr, *returnOptsPtr; + Tcl_Obj *linePtr, *chanObjPtr, *resultDictPtr, *returnOptsPtr; int code = TCL_OK; if ((objc != 2) && (objc != 3)) { @@ -331,9 +331,12 @@ Tcl_GetsObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + resultDictPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) + , linePtr); returnOptsPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , linePtr); + , resultDictPtr); code = TCL_ERROR; Tcl_SetReturnOptions(interp, returnOptsPtr); goto done; @@ -384,7 +387,7 @@ Tcl_ReadObjCmd( int toRead; /* How many bytes to read? */ int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ - Tcl_Obj *resultPtr, *returnOptsPtr, *chanObjPtr; + Tcl_Obj *resultPtr, *resultDictPtr, *returnOptsPtr, *chanObjPtr; if ((objc != 2) && (objc != 3)) { Interp *iPtr; @@ -473,9 +476,12 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + resultDictPtr = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) + , resultPtr); returnOptsPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , resultPtr); + , resultDictPtr); TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); Tcl_SetReturnOptions(interp, returnOptsPtr); diff --git a/tests/io.test b/tests/io.test index 854759e..3f00561 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1547,37 +1547,43 @@ test io-12.8 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 160 -test io-12.9 {ReadChars: multibyte chars split} -body { - set f [open $path(test1) w] - fconfigure $f -translation binary - puts -nonewline $f [string repeat a 9]\xC2 - close $f - set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 10 - set in [read $f] - read $f - close $f - scan [string index $in end] %c -} -cleanup { - catch {close $f} -} -result 194 -test io-12.9.strict {ReadChars: multibyte chars split} -body { - set res {} - set f [open $path(test1) w] - fconfigure $f -translation binary - puts -nonewline $f [string repeat a 9]\xC2 - close $f - set f [open $path(test1)] - fconfigure $f -encoding utf-8 -strictencoding 1 -buffersize 10 - set status [catch {read $f} cres copts] - set in [dict get $copts -result] - lappend res $in - lappend res $status $cres - set res -} -cleanup { - close $f - catch {close $f} -} -match glob -result {aaaaaaaaa 1 {error reading "*": illegal byte sequence}} + + +apply [list {} { + set template { + test io-12.9.@variant@ {ReadChars: multibyte chars split, default (strict)} -body { + set res {} + set f [open $path(test1) w] + fconfigure $f -translation binary + puts -nonewline $f [string repeat a 9]\xC2 + close $f + set f [open $path(test1)] + fconfigure $f -encoding utf-8 @strict@ -buffersize 10 + set status [catch {read $f} cres copts] + set in [dict get $copts -result] + lappend res $in + lappend res $status $cres + set status [catch {read $f} cres copts] + set in [dict get $copts -result] + lappend res $in + lappend res $status $cres + set res + } -cleanup { + catch {close $f} + } -match glob -result {{read aaaaaaaaa} 1\ + {error reading "*": illegal byte sequence}\ + {read {}} 1 {error reading "*": illegal byte sequence}} + } + + # strict encoding may be the default in Tcl 9, but in 8 it is not + foreach variant {encodingstrict} strict {{-strictencoding 1}} { + set script [string map [ + list @variant@ $variant @strict@ $strict] $template] + uplevel 1 $script + } +} [namespace current]] + + test io-12.10 {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary @@ -9075,7 +9081,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] - set d [dict get $copts -result] + set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status $cres } -cleanup { @@ -9094,7 +9100,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] - set d [dict get $copts -result] + set d [dict get $copts -result read] binary scan $d H* hd lappend hd [eof $f] lappend hd $status @@ -9173,9 +9179,7 @@ test io-75.9 {unrepresentable character write passes and is replaced by ?} -setu removeFile io-75.9 } -match glob -result [list {A} {error writing "*": illegal byte sequence}] -# Incomplete sequence test. -# This error may IMHO only be detected with the close. -# But the read already returns the incomplete sequence. + test io-75.10 {incomplete multibyte encoding read is ignored} -setup { set fn [makeFile {} io-75.10] set f [open $fn w+] @@ -9183,7 +9187,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none + fconfigure $f -encoding utf-8 -strictencoding 0 -buffering none } -body { set d [read $f] close $f @@ -9192,8 +9196,32 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { } -cleanup { removeFile io-75.10 } -result 41c0 -# The current result returns the orphan byte as byte. -# This may be expected due to special utf-8 handling. + + +test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { + set res {} + set fn [makeFile {} io-75.10] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\xC0 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -strictencoding 1 -buffering none +} -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + binary scan $d H* hd + lappend res $hd $cres + chan configure $f -encoding iso8859-1 + set d [read $f] + binary scan $d H* hd + lappend res $hd + close $f + return $res +} -cleanup { + removeFile io-75.10 +} -match glob -result {41 {error reading "*": illegal byte sequence} c0} + # As utf-8 has a special treatment in multi-byte decoding, also test another # one. @@ -9206,10 +9234,11 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ + -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] - set d [dict get $copts -result] + set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status lappend hd $cres @@ -9218,14 +9247,36 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { removeFile io-75.11 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -test io-75.12 {invalid utf-8 encoding read is ignored} -setup { + +test io-75.12 {invalid utf-8 encoding read is an error} -setup { + set res {} + set fn [makeFile {} io-75.12] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ + -strictencoding 1 +} -body { + set status [catch {read $f} cres copts] + set d [dict get $copts -result read] + close $f + binary scan $d H* hd + lappend res $hd $status $cres + return $res +} -cleanup { + removeFile io-75.12 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} +test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf + fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ + -translation lf -strictencoding 0 } -body { set d [read $f] close $f @@ -9245,7 +9296,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { set status [catch {read $f} cres copts] - set d [dict get $copts -result] + set d [dict get $copts -result read] binary scan $d H* hd lappend hd $status close $f @@ -9305,7 +9356,7 @@ test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { close $chan removeFile io-75.15 } -match glob -result {hello 1 {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence} AB c0 40} + 1 {error reading "*": illegal byte sequence} {read AB} c0 40} test io-76.0 {channel modes} -setup { -- cgit v0.12 From 33f4149ee57d7c60a267c0f72bec5dabba389613 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 8 Jan 2023 22:47:45 +0000 Subject: Fix for [https://core.tcl-lang.org/tk/tktview?name=370b1ff03e|370b1ff03e]. Not complete/correct yet, since this backouts the fix for [4dbfa46caa] --- generic/tclEncoding.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d10d9ca..cfad548 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2409,7 +2409,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { + if (STOPONERROR) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -3199,7 +3199,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } -- cgit v0.12 From 24656c280590bbc66e98685342c461af58f478a1 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Sat, 14 Jan 2023 22:07:21 +0000 Subject: Properly quote contents of Make variables to pass through gdb.run file. --- unix/Makefile.in | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/unix/Makefile.in b/unix/Makefile.in index eac47a6..21d4085 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -270,6 +270,8 @@ VALGRINDARGS = --tool=memcheck --num-callers=24 \ --keep-debuginfo=yes \ --suppressions=$(TOOL_DIR)/valgrind_suppress +shquotequote = $(subst ",\",$(subst ',\',$(1))) +shquotesingle = $(subst ','\'',$(1)) #-------------------------------------------------------------------------- # The information below should be usable as is. The configure script won't # modify it and you shouldn't need to modify it either. @@ -816,9 +818,12 @@ test-tcl: ${TCLTEST_EXE} $(SHELL_ENV) ./${TCLTEST_EXE} $(TOP_DIR)/tests/all.tcl $(TESTFLAGS) gdb-test: ${TCLTEST_EXE} - @echo "set env @LD_LIBRARY_PATH_VAR@=`pwd`:$${@LD_LIBRARY_PATH_VAR@}" > gdb.run - @echo "set env TCL_LIBRARY=${TCL_BUILDTIME_LIBRARY}" >> gdb.run - @echo "set args $(TOP_DIR)/tests/all.tcl $(TESTFLAGS) -singleproc 1" >> gdb.run + @printf '%s ' set env @LD_LIBRARY_PATH_VAR@=\"`pwd`$${@LD_LIBRARY_PATH_VAR@:+:$${@LD_LIBRARY_PATH_VAR}}\" > gdb.run + @printf '\n' >>gdb.run + @printf '%s ' set env TCL_LIBRARY=\'$(call shquotesingle,${TCL_BUILDTIME_LIBRARY})\' >> gdb.run + @printf '\n' >>gdb.run + @printf '%s ' set args $(call shquotequote,$(TOP_DIR))/tests/all.tcl\ + $(call shquotequote,$(TESTFLAGS)) -singleproc 1 >> gdb.run $(GDB) ./${TCLTEST_EXE} --command=gdb.run rm gdb.run -- cgit v0.12 From 5aef206d2d6f165c0997b8050cf92c5645afb8b4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 14 Jan 2023 22:20:53 +0000 Subject: Proposed fix for [a31caff057]: encoding command does not allow -strict to be used with -failindex --- doc/encoding.n | 13 ++++++------- generic/tclCmdAH.c | 6 +++--- tests/cmdAH.test | 28 ++++++++++++++-------------- tests/encoding.test | 4 ++-- tests/safe.test | 8 ++++---- 5 files changed, 29 insertions(+), 30 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index 78580f2..7eae61a 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -28,7 +28,7 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? ?\fB-strict\fR? ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR . Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The characters in \fIdata\fR are 8 bit binary data. The resulting @@ -48,19 +48,19 @@ in case of a conversion error, the position of the input byte causing the error is returned in the given variable. The return value of the command are the converted characters until the first error position. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. +may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows the the sequence \fB\\xC0\\x80\fR and noncharacters (which - +encoder, it disallows invalid byte sequences and surrogates (which - otherwise - are just passed through). .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP -\fBencoding convertto\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? ?\fB-strict\fR? ?\fIencoding\fR? \fIstring\fR +\fBencoding convertto\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIstring\fR . Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. The result is a sequence of bytes that represents the converted @@ -81,15 +81,14 @@ in case of a conversion error, the position of the input character causing the e is returned in the given variable. The return value of the command are the converted bytes until the first error position. No error condition is raised. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. +may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows the the sequence \fB\\xC0\\x80\fR and noncharacters (which - -otherwise - are just passed through). +encoder, it disallows surrogates (which - otherwise - are just passed through). .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index b4084d1..8f1bf1d 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -593,7 +593,7 @@ EncodingConvertfromObjCmd( goto encConvFromError; } failVarObj = objv[2]; - flags = TCL_ENCODING_STOPONERROR; + flags = TCL_ENCODING_STRICT; objcUnprocessed -= 2; } switch (objcUnprocessed) { @@ -610,7 +610,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); return TCL_ERROR; } @@ -749,7 +749,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); return TCL_ERROR; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index cb7e1cf..3533cb6 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -179,7 +179,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -201,7 +201,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -238,10 +238,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} @@ -250,19 +250,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body } -returnCodes 1 -result {unknown encoding "-nocomplain"} test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertfrom -failindex ABC @@ -270,12 +270,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertto -failindex ABC @@ -283,7 +283,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { @@ -320,7 +320,7 @@ test cmdAH-4.20.1 {convertrom -failindex with incomplete utf8} -body { set x [encoding convertfrom -failindex i utf-8 A\xc3] binary scan $x H* y list $y $i -} -returnCodes 0 -result {41c3 -1} +} -returnCodes 0 -result {41 1} test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { proc encoding_test {} { set x [encoding convertfrom -failindex i utf-8 A\xc3] @@ -330,7 +330,7 @@ test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} - } -body { # Compile and execute encoding_test -} -returnCodes 0 -result {41c3 -1} -cleanup { +} -returnCodes 0 -result {41 1} -cleanup { rename encoding_test "" } test cmdAH-4.21.1 {convertto -failindex with wrong character} -body { diff --git a/tests/encoding.test b/tests/encoding.test index 5fd4e8c..a1d129e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -681,10 +681,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} diff --git a/tests/safe.test b/tests/safe.test index e5d4d18..f2c0862 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12 From 25f683758268f3059f49d51f7ae24c3cf0c1d316 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 14 Jan 2023 22:57:15 +0000 Subject: Same change for "encoding convertto" --- generic/tclCmdAH.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 8f1bf1d..016bd02 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -732,7 +732,7 @@ EncodingConverttoObjCmd( goto encConvToError; } failVarObj = objv[2]; - flags = TCL_ENCODING_STOPONERROR; + flags = TCL_ENCODING_STRICT; objcUnprocessed -= 2; } switch (objcUnprocessed) { -- cgit v0.12 From 81262438a784ae0087c36fabd189c15a2433df33 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Sun, 15 Jan 2023 19:26:36 +0000 Subject: Fix issue [8f7fdea2d], string-2.20.1 fails on big endian, and also fix issues in TclStringCmp when checkEq is 1. --- generic/tclCmdMZ.c | 38 ++++++++++++++++++++++++-------------- tests/stringComp.test | 6 +++--- 2 files changed, 27 insertions(+), 17 deletions(-) diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index 53e12c5..a97f309 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -2629,7 +2629,7 @@ StringEqualCmd( */ objv += objc-2; - match = TclStringCmp(objv[0], objv[1], 0, nocase, reqlength); + match = TclStringCmp(objv[0], objv[1], 1, nocase, reqlength); Tcl_SetObjResult(interp, Tcl_NewBooleanObj(match ? 0 : 1)); return TCL_OK; } @@ -2702,8 +2702,8 @@ TclStringCmp( Tcl_Obj *value2Ptr, int checkEq, /* comparison is only for equality */ int nocase, /* comparison is not case sensitive */ - int reqlength) /* requested length; -1 to compare whole - * strings */ + int reqlength) /* requested length in characters; -1 to + * compare whole strings */ { const char *s1, *s2; int empty, length, match, s1len, s2len; @@ -2731,10 +2731,10 @@ TclStringCmp( } else if ((value1Ptr->typePtr == &tclStringType) && (value2Ptr->typePtr == &tclStringType)) { /* - * Do a unicode-specific comparison if both of the args are of String + * Do a Unicode-specific comparison if both of the args are of String * type. If the char length == byte length, we can do a memcmp. In * benchmark testing this proved the most efficient check between the - * unicode and string comparison operations. + * Unicode and string comparison operations. */ if (nocase) { @@ -2748,6 +2748,9 @@ TclStringCmp( && (value1Ptr->bytes != NULL) && (s2len == value2Ptr->length) && (value2Ptr->bytes != NULL)) { + /* each byte represents one character so s1l3n, s2l3n, and + * reqlength are in both bytes and characters + */ s1 = value1Ptr->bytes; s2 = value2Ptr->bytes; memCmpFn = memcmp; @@ -2756,14 +2759,17 @@ TclStringCmp( s2 = (char *) Tcl_GetUnicode(value2Ptr); if ( #if defined(WORDS_BIGENDIAN) && (TCL_UTF_MAX != 4) - 1 + 1 #else - checkEq + checkEq #endif /* WORDS_BIGENDIAN */ - ) { + ) { memCmpFn = memcmp; s1len *= sizeof(Tcl_UniChar); s2len *= sizeof(Tcl_UniChar); + if (reqlength > 0) { + reqlength *= sizeof(Tcl_UniChar); + } } else { memCmpFn = (memCmpFn_t) Tcl_UniCharNcmp; } @@ -2805,7 +2811,7 @@ TclStringCmp( s2 = TclGetStringFromObj(value2Ptr, &s2len); } - if (!nocase && checkEq) { + if (!nocase && checkEq && reqlength < 0) { /* * When we have equal-length we can check only for (in)equality. * We can use memcmp() in all (n)eq cases because we don't need to @@ -2826,24 +2832,28 @@ TclStringCmp( s1len = Tcl_NumUtfChars(s1, s1len); s2len = Tcl_NumUtfChars(s2, s2len); memCmpFn = (memCmpFn_t) - (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); + (nocase ? Tcl_UtfNcasecmp : Tcl_UtfNcmp); } } } + /* At this point s1len, s2len, and reqlength should by now have been + * adjusted so that they are all in the units expected by the selected + * comparison function. + */ + length = (s1len < s2len) ? s1len : s2len; if (reqlength > 0 && reqlength < length) { length = reqlength; } else if (reqlength < 0) { /* - * The requested length is negative, so we ignore it by setting it to - * length + 1 so we correct the match var. + * The requested length is negative, so ignore it by setting it to + * length + 1 to correct the match var. */ - reqlength = length + 1; } - if (checkEq && (s1len != s2len)) { + if (checkEq && reqlength < 0 && (s1len != s2len)) { match = 1; /* This will be reversed below. */ } else { /* diff --git a/tests/stringComp.test b/tests/stringComp.test index a17390d..95a738c 100644 --- a/tests/stringComp.test +++ b/tests/stringComp.test @@ -100,7 +100,7 @@ foreach {tname tbody tresult tcode} { {unicode} {string compare \334 \u00fc} -1 {} {unicode} {string compare \334\334\334\374\374 \334\334\334\334\334} 1 {} {high bit} { - # This test will fail if the underlying comparison + # This test fails if the underlying comparison # is using signed chars instead of unsigned chars. # (like SunOS's default memcmp thus the compat/memcmp.c) string compare "\x80" "@" @@ -156,10 +156,10 @@ foreach {tname tbody tresult tcode} { {-nocase null strings} { string compare -nocase foo "" } 1 {} - {with length, unequal strings} { + {with length, unequal strings, partial first string} { string compare -length 2 abc abde } 0 {} - {with length, unequal strings} { + {with length, unequal strings 2, full first string} { string compare -length 2 ab abde } 0 {} {with NUL character vs. other ASCII} { -- cgit v0.12 From 7faef9ce700c7dc01e1333046e7a69bdb7a45bbf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 15 Jan 2023 19:45:54 +0000 Subject: Update documentation, matching current implementation --- doc/encoding.n | 13 ++++++------- generic/tclCmdAH.c | 4 ++-- tests/cmdAH.test | 24 ++++++++++++------------ tests/encoding.test | 4 ++-- tests/safe.test | 8 ++++---- 5 files changed, 26 insertions(+), 27 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index 78580f2..d4b91e2 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -28,7 +28,7 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? ?\fB-strict\fR? ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR . Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The characters in \fIdata\fR are 8 bit binary data. The resulting @@ -48,19 +48,19 @@ in case of a conversion error, the position of the input byte causing the error is returned in the given variable. The return value of the command are the converted characters until the first error position. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. +may not be used together with \fB-nocomplain\fR or \fB-strict\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows the the sequence \fB\\xC0\\x80\fR and noncharacters (which - +encoder, it disallows invalid byte sequences and surrogates (which - otherwise - are just passed through). .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP -\fBencoding convertto\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? ?\fB-strict\fR? ?\fIencoding\fR? \fIstring\fR +\fBencoding convertto\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIstring\fR . Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. The result is a sequence of bytes that represents the converted @@ -81,15 +81,14 @@ in case of a conversion error, the position of the input character causing the e is returned in the given variable. The return value of the command are the converted bytes until the first error position. No error condition is raised. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. +may not be used together with \fB-nocomplain\fR or \fB-strict\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows the the sequence \fB\\xC0\\x80\fR and noncharacters (which - -otherwise - are just passed through). +encoder, it disallows surrogates (which - otherwise - are just passed through). .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index b4084d1..dd0a525 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -610,7 +610,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); return TCL_ERROR; } @@ -749,7 +749,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); return TCL_ERROR; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index cb7e1cf..9b853f5 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -179,7 +179,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -201,7 +201,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -238,10 +238,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} @@ -250,19 +250,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body } -returnCodes 1 -result {unknown encoding "-nocomplain"} test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertfrom -failindex ABC @@ -270,12 +270,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertto -failindex ABC @@ -283,7 +283,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { diff --git a/tests/encoding.test b/tests/encoding.test index 5fd4e8c..a1d129e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -681,10 +681,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} diff --git a/tests/safe.test b/tests/safe.test index e5d4d18..f2c0862 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12 From 5021d1c11f0e3287ce96351e20e79552a92e7177 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 16 Jan 2023 13:10:07 +0000 Subject: New flag TCL_ENCODING_HACK_FLAG to control the behaviour. (This is NOT the way to do it, but it's only meant for experimenting) --- generic/tcl.h | 1 + generic/tclEncoding.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index f373382..36e1a35 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2145,6 +2145,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_MODIFIED 0x20 #define TCL_ENCODING_NOCOMPLAIN 0x40 #define TCL_ENCODING_STRICT 0x44 +#define TCL_ENCODING_HACK_FLAG (1<<20) /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index cfad548..2c4382d 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2409,7 +2409,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if (STOPONERROR) { + if ((STOPONERROR) && ((flags & TCL_ENCODING_CHAR_LIMIT) || (flags & TCL_ENCODING_HACK_FLAG))) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -3199,7 +3199,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (STOPONERROR) { + if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_UNKNOWN; break; } -- cgit v0.12 From 925f00c5cad128feb5c4e49b7dd31edc205b4746 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 17 Jan 2023 14:15:39 +0000 Subject: Use TCL_ENCODING_HACK_FLAG in TableFromUtfProc too --- generic/tclEncoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2c4382d..3b9ab3e 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3199,7 +3199,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { + if ((STOPONERROR) && ((flags & TCL_ENCODING_CHAR_LIMIT) || (flags & TCL_ENCODING_HACK_FLAG))) { result = TCL_CONVERT_UNKNOWN; break; } -- cgit v0.12 From 5d50502a2952145d5a6eaa4482ccd79628c1e16f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 18 Jan 2023 13:26:15 +0000 Subject: Forget about TCL_ENCODING_HACK_FLAG, this should be the fix. Testing ... --- generic/tcl.h | 1 - generic/tclEncoding.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 36e1a35..f373382 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2145,7 +2145,6 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_MODIFIED 0x20 #define TCL_ENCODING_NOCOMPLAIN 0x40 #define TCL_ENCODING_STRICT 0x44 -#define TCL_ENCODING_HACK_FLAG (1<<20) /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3b9ab3e..ca96057 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2409,7 +2409,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if ((STOPONERROR) && ((flags & TCL_ENCODING_CHAR_LIMIT) || (flags & TCL_ENCODING_HACK_FLAG))) { + if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -3199,7 +3199,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if ((STOPONERROR) && ((flags & TCL_ENCODING_CHAR_LIMIT) || (flags & TCL_ENCODING_HACK_FLAG))) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } -- cgit v0.12 From 31226696c3e4dd02735044bc1fbf316c0955e65a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 18 Jan 2023 14:10:09 +0000 Subject: Some test-cases need -nocomplainencoding 1, because they use the fallback behavior. --- tests/chanio.test | 6 +++--- tests/io.test | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index 2189cc4..6b45da9 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -252,7 +252,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 + chan configure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -265,7 +265,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 + chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -298,7 +298,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 + chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/io.test b/tests/io.test index d10e1e4..d2e687d 100644 --- a/tests/io.test +++ b/tests/io.test @@ -272,7 +272,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 + fconfigure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -286,7 +286,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 + fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -319,7 +319,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 + fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f -- cgit v0.12 From 22657ad365387756101eef242c94c8989688955c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 19 Jan 2023 17:01:40 +0000 Subject: New proposal: Allow "-strict" immediately before or after "-failindex var". --- doc/encoding.n | 4 ++-- generic/tclCmdAH.c | 43 +++++++++++++++++++++++++++++++++----- generic/tclEncoding.c | 7 ++++--- generic/tclIO.h | 1 + tests/cmdAH.test | 58 ++++++++++++++++++++++++++++++++++++++++----------- tests/encoding.test | 4 ++-- tests/safe.test | 8 +++---- 7 files changed, 97 insertions(+), 28 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index 7eae61a..24ca1c7 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -48,7 +48,7 @@ in case of a conversion error, the position of the input byte causing the error is returned in the given variable. The return value of the command are the converted characters until the first error position. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR. +may not be used together with \fB-nocomplain\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. @@ -81,7 +81,7 @@ in case of a conversion error, the position of the input character causing the e is returned in the given variable. The return value of the command are the converted bytes until the first error position. No error condition is raised. In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR. +may not be used together with \fB-nocomplain\fR. .PP The option \fB-nocomplain\fR has no effect and is available for compatibility with TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 016bd02..72cc618 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -12,6 +12,7 @@ */ #include "tclInt.h" +#include "tclIO.h" #ifdef _WIN32 # include "tclWinInt.h" #endif @@ -574,7 +575,7 @@ EncodingConvertfromObjCmd( if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc > 2 && objc < 6) { + } else if (objc > 2 && objc < 7) { int objcUnprocessed = objc; data = objv[objc - 1]; bytesPtr = Tcl_GetString(objv[1]); @@ -586,6 +587,16 @@ EncodingConvertfromObjCmd( && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { flags = TCL_ENCODING_STRICT; objcUnprocessed--; + bytesPtr = Tcl_GetString(objv[2]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' + && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { + /* at least two additional arguments needed */ + if (objc < 6) { + goto encConvFromError; + } + failVarObj = objv[3]; + objcUnprocessed -= 2; + } } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { /* at least two additional arguments needed */ @@ -593,8 +604,14 @@ EncodingConvertfromObjCmd( goto encConvFromError; } failVarObj = objv[2]; - flags = TCL_ENCODING_STRICT; + flags = ENCODING_FAILINDEX; objcUnprocessed -= 2; + bytesPtr = Tcl_GetString(objv[3]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 's' + && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { + flags = TCL_ENCODING_STRICT; + objcUnprocessed --; + } } switch (objcUnprocessed) { case 3: @@ -610,7 +627,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"); return TCL_ERROR; } @@ -725,6 +742,16 @@ EncodingConverttoObjCmd( && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { flags = TCL_ENCODING_STRICT; objcUnprocessed--; + stringPtr = Tcl_GetString(objv[2]); + if (stringPtr[0] == '-' && stringPtr[1] == 'f' + && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { + /* at least two additional arguments needed */ + if (objc < 6) { + goto encConvToError; + } + failVarObj = objv[3]; + objcUnprocessed -= 2; + } } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { /* at least two additional arguments needed */ @@ -732,8 +759,14 @@ EncodingConverttoObjCmd( goto encConvToError; } failVarObj = objv[2]; - flags = TCL_ENCODING_STRICT; + flags = TCL_ENCODING_STOPONERROR; objcUnprocessed -= 2; + stringPtr = Tcl_GetString(objv[3]); + if (stringPtr[0] == '-' && stringPtr[1] == 's' + && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { + flags = TCL_ENCODING_STRICT; + objcUnprocessed --; + } } switch (objcUnprocessed) { case 3: @@ -749,7 +782,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"); return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ca96057..2f7d803 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -10,6 +10,7 @@ */ #include "tclInt.h" +#include "tclIO.h" typedef size_t (LengthProc)(const char *src); @@ -2386,9 +2387,9 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { /* - * If in input mode, and -strict is specified: This is an error. + * If in input mode, and -strict or -failindex is specified: This is an error. */ if (flags & TCL_ENCODING_MODIFIED) { result = TCL_CONVERT_SYNTAX; @@ -2413,7 +2414,7 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/generic/tclIO.h b/generic/tclIO.h index fbd01ee..a69e990 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -284,6 +284,7 @@ typedef struct ChannelState { * usable, but it may not be closed * again from within the close * handler. */ +#define ENCODING_FAILINDEX (1<<20) /* Internal flag, fail on Invalid bytes only */ #define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed. * No further Tcl-level write IO on * the channel is allowed. */ diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 3533cb6..9d51951 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -179,7 +179,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -201,7 +201,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -238,10 +238,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} @@ -250,19 +250,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body } -returnCodes 1 -result {unknown encoding "-nocomplain"} test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertfrom -failindex ABC @@ -270,12 +270,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertto -failindex ABC @@ -283,7 +283,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { @@ -333,6 +333,40 @@ test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} - } -returnCodes 0 -result {41 1} -cleanup { rename encoding_test "" } +test cmdAH-4.20.3 {convertrom -failindex with incomplete utf8} -body { + set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] + binary scan $x H* y + list $y $i +} -returnCodes 0 -result {41 1} +test cmdAH-4.20.4 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { + proc encoding_test {} { + set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] + binary scan $x H* y + list $y $i + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result {41 1} -cleanup { + rename encoding_test "" +} +test cmdAH-4.20.5 {convertrom -failindex with incomplete utf8} -body { + set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] + binary scan $x H* y + list $y $i +} -returnCodes 0 -result {41 1} +test cmdAH-4.20.6 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { + proc encoding_test {} { + set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] + binary scan $x H* y + list $y $i + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result {41 1} -cleanup { + rename encoding_test "" +} test cmdAH-4.21.1 {convertto -failindex with wrong character} -body { set x [encoding convertto -failindex i iso8859-1 A\u0141] binary scan $x H* y diff --git a/tests/encoding.test b/tests/encoding.test index a1d129e..095672c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -681,10 +681,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} diff --git a/tests/safe.test b/tests/safe.test index f2c0862..be1ce57 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12 From c611835152c63f8995c427ab9684ecf6125ec2f8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 19 Jan 2023 21:36:05 +0000 Subject: Proposed fix for [3e8074aea7]: [interp limit time -seconds] has a y2k38 problem --- generic/tclInterp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/generic/tclInterp.c b/generic/tclInterp.c index 11202ce..613a86a 100644 --- a/generic/tclInterp.c +++ b/generic/tclInterp.c @@ -4686,7 +4686,7 @@ ChildTimeLimitCmd( Tcl_Time limitMoment; Tcl_LimitGetTime(childInterp, &limitMoment); - Tcl_SetObjResult(interp, Tcl_NewLongObj(limitMoment.sec)); + Tcl_SetObjResult(interp, Tcl_NewWideIntObj(limitMoment.sec)); } break; } @@ -4744,25 +4744,27 @@ ChildTimeLimitCmd( } limitMoment.usec = ((long) tmp)*1000; break; - case OPT_SEC: + case OPT_SEC: { + Tcl_WideInt sec; secObj = objv[i+1]; (void) Tcl_GetStringFromObj(objv[i+1], &secLen); if (secLen == 0) { break; } - if (TclGetIntFromObj(interp, objv[i+1], &tmp) != TCL_OK) { + if (TclGetWideIntFromObj(interp, objv[i+1], &sec) != TCL_OK) { return TCL_ERROR; } - if (tmp < 0) { + if (sec < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "seconds must be at least 0", -1)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADVALUE", NULL); return TCL_ERROR; } - limitMoment.sec = tmp; + limitMoment.sec = sec; break; } + } } if (milliObj != NULL || secObj != NULL) { if (milliObj != NULL) { -- cgit v0.12 From 073e6715d127e9c252cdc1c852445d0f983e8e27 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 20 Jan 2023 16:51:50 +0000 Subject: Better error-message --- generic/tclCmdAH.c | 9 +++++++-- tests/cmdAH.test | 24 ++++++++++++------------ tests/encoding.test | 4 ++-- tests/safe.test | 8 ++++---- 4 files changed, 25 insertions(+), 20 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 72cc618..b52e2fc 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -627,7 +627,9 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); + ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; + Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); return TCL_ERROR; } @@ -782,7 +784,10 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); + ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; + Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); + return TCL_ERROR; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 9d51951..d7a3657 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -179,7 +179,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -201,7 +201,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -238,10 +238,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} @@ -250,19 +250,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body } -returnCodes 1 -result {unknown encoding "-nocomplain"} test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertfrom -failindex ABC @@ -270,12 +270,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertto -failindex ABC @@ -283,7 +283,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { diff --git a/tests/encoding.test b/tests/encoding.test index 095672c..61676ea 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -681,10 +681,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} test encoding-24.24 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} diff --git a/tests/safe.test b/tests/safe.test index be1ce57..ee81783 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12 From 792c57a7fb9f7db346f92861d380a32d1c31ea8a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 20 Jan 2023 20:59:22 +0000 Subject: Make documentation conform to implementation in this branch --- doc/encoding.n | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index 24ca1c7..9577da3 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -28,7 +28,8 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR . Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The characters in \fIdata\fR are 8 bit binary data. The resulting @@ -56,11 +57,13 @@ This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR encoder, it disallows invalid byte sequences and surrogates (which - -otherwise - are just passed through). +otherwise - are just passed through). This option may not be used together +with \fB-nocomplain\fR. .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP -\fBencoding convertto\fR ?\fB-nocomplain\fR|\fB-strict\fR|\fB-failindex var\fR? ?\fIencoding\fR? \fIstring\fR +\fBencoding convertto\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR +\fBencoding convertto\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR . Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. The result is a sequence of bytes that represents the converted @@ -88,7 +91,8 @@ TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. This switch restores the TCL8.7 behaviour. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows surrogates (which - otherwise - are just passed through). +encoder, it disallows surrogates (which - otherwise - are just passed through). This +option may not be used together with \fB-nocomplain\fR. .VE "TCL8.7 TIP346, TIP607, TIP601" .RE .TP -- cgit v0.12 From 52948a790cfe853df5cdbecc3c5436685b6210ba Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Sun, 22 Jan 2023 01:33:44 +0000 Subject: Bug [e3dcab1d14] fix --- generic/tclStrToD.c | 59 +++++++++++++++++++++++++++-------------------------- tests/expr.test | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 29 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 557eaa1..972b5fd 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -263,34 +263,35 @@ static const int log2pow5[27] = { }; #define N_LOG2POW5 27 -static const Tcl_WideUInt wuipow5[27] = { - (Tcl_WideUInt) 1, /* 5**0 */ - (Tcl_WideUInt) 5, - (Tcl_WideUInt) 25, - (Tcl_WideUInt) 125, - (Tcl_WideUInt) 625, - (Tcl_WideUInt) 3125, /* 5**5 */ - (Tcl_WideUInt) 3125*5, - (Tcl_WideUInt) 3125*25, - (Tcl_WideUInt) 3125*125, - (Tcl_WideUInt) 3125*625, - (Tcl_WideUInt) 3125*3125, /* 5**10 */ - (Tcl_WideUInt) 3125*3125*5, - (Tcl_WideUInt) 3125*3125*25, - (Tcl_WideUInt) 3125*3125*125, - (Tcl_WideUInt) 3125*3125*625, - (Tcl_WideUInt) 3125*3125*3125, /* 5**15 */ - (Tcl_WideUInt) 3125*3125*3125*5, - (Tcl_WideUInt) 3125*3125*3125*25, - (Tcl_WideUInt) 3125*3125*3125*125, - (Tcl_WideUInt) 3125*3125*3125*625, - (Tcl_WideUInt) 3125*3125*3125*3125, /* 5**20 */ - (Tcl_WideUInt) 3125*3125*3125*3125*5, - (Tcl_WideUInt) 3125*3125*3125*3125*25, - (Tcl_WideUInt) 3125*3125*3125*3125*125, - (Tcl_WideUInt) 3125*3125*3125*3125*625, - (Tcl_WideUInt) 3125*3125*3125*3125*3125, /* 5**25 */ - (Tcl_WideUInt) 3125*3125*3125*3125*3125*5 /* 5**26 */ +static const Tcl_WideUInt wuipow5[28] = { + (Tcl_WideUInt) 1U, /* 5**0 */ + (Tcl_WideUInt) 5U, + (Tcl_WideUInt) 25U, + (Tcl_WideUInt) 125U, + (Tcl_WideUInt) 625U, + (Tcl_WideUInt) 3125U, /* 5**5 */ + (Tcl_WideUInt) 3125U*5U, + (Tcl_WideUInt) 3125U*25U, + (Tcl_WideUInt) 3125U*125U, + (Tcl_WideUInt) 3125U*625U, + (Tcl_WideUInt) 3125U*3125U, /* 5**10 */ + (Tcl_WideUInt) 3125U*3125U*5U, + (Tcl_WideUInt) 3125U*3125U*25U, + (Tcl_WideUInt) 3125U*3125U*125U, + (Tcl_WideUInt) 3125U*3125U*625U, + (Tcl_WideUInt) 3125U*3125U*3125U, /* 5**15 */ + (Tcl_WideUInt) 3125U*3125U*3125U*5U, + (Tcl_WideUInt) 3125U*3125U*3125U*25U, + (Tcl_WideUInt) 3125U*3125U*3125U*125U, + (Tcl_WideUInt) 3125U*3125U*3125U*625U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U, /* 5**20 */ + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*5U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*25U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*125U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*625U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*3125U, /* 5**25 */ + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*3125U*5U, + (Tcl_WideUInt) 3125U*3125U*3125U*3125U*3125U*25U /* 5**27 */ }; /* @@ -4395,7 +4396,7 @@ TclDoubleDigits( ++m2plus; } - if (s5+1 < N_LOG2POW5 && s2+1 + log2pow5[s5+1] <= 64) { + if (s5+1 < N_LOG2POW5 && s2+1 + log2pow5[s5+1] < 64) { /* * If 10*2**s2*5**s5 == 2**(s2+1)+5**(s5+1) fits in a 64-bit word, * then all our intermediate calculations can be done using exact diff --git a/tests/expr.test b/tests/expr.test index 4fa6821..57c44ed 100644 --- a/tests/expr.test +++ b/tests/expr.test @@ -7277,6 +7277,43 @@ test expr-52.1 { ::tcl::unsupported::representation $a]] } {0 0 1 1} +# Bug e3dcab1d14 +proc do-one-test-expr-61 {e p float athreshold} { + # e - power of 2 to test + # p - tcl_precision to test wuth + # float - floating point value 2**-$p + # athreshold - tolerable absolute error (1/2 decimal digit in + # least significant place plus 1/2 least significant bit) + set trouble {} + set ::tcl_precision $p + set xfmt x[expr $float] + set ::tcl_precision 0 + set fmt [string range $xfmt 1 end] + set aerror [expr {abs($fmt - $float)}] + if {$aerror > $athreshold} { + return "Result $fmt is more than $athreshold away from $float" + } else { + return {} + } +} + +proc run-test-expr-61 {} { + for {set e 0} {$e <= 1023} {incr e} { + set pt [expr {floor($e*log(2)/log(10))}] + for {set p 6} {$p <= 17} {incr p} { + set athreshold [expr {0.5*10.0**-($pt+$p) + 2.0**-($e+53)}] + set numer [expr {5**$e}] + set xfloat x[expr {2.**-$e}] + set float [string range $xfloat 1 end] + test expr-61.$p.$e "convert 2**-$e to decimal at precision $p" { + do-one-test-expr-61 $e $p $float $athreshold + } {} + } + } + rename do-one-test-expr-61 {} + rename run-test-expr-61 {} +} +run-test-expr-61 # cleanup -- cgit v0.12 From 119519c4df904cc9914302f68b70897ad33b9db3 Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Sun, 22 Jan 2023 01:37:07 +0000 Subject: Remove unneeded hard-coded array size --- generic/tclStrToD.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 972b5fd..d5578a9 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -263,7 +263,7 @@ static const int log2pow5[27] = { }; #define N_LOG2POW5 27 -static const Tcl_WideUInt wuipow5[28] = { +static const Tcl_WideUInt wuipow5[] = { (Tcl_WideUInt) 1U, /* 5**0 */ (Tcl_WideUInt) 5U, (Tcl_WideUInt) 25U, -- cgit v0.12 From 4ef7c0c4b836759619b399102ea01f01b4a61165 Mon Sep 17 00:00:00 2001 From: Kevin B Kenny Date: Sun, 22 Jan 2023 01:48:02 +0000 Subject: Missed one more off-by-one error, also, tests misnumbered for merge forward. --- generic/tclStrToD.c | 2 +- tests/expr.test | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index d5578a9..c55554c 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -4453,7 +4453,7 @@ TclDoubleDigits( s2 -= b2; b2 = 0; } - if (s5+1 < N_LOG2POW5 && s2+1 + log2pow5[s5+1] <= 64) { + if (s5+1 < N_LOG2POW5 && s2+1 + log2pow5[s5+1] < 64) { /* * If 10*2**s2*5**s5 == 2**(s2+1)+5**(s5+1) fits in a 64-bit word, * then all our intermediate calculations can be done using exact diff --git a/tests/expr.test b/tests/expr.test index 57c44ed..2434ab4 100644 --- a/tests/expr.test +++ b/tests/expr.test @@ -7278,7 +7278,7 @@ test expr-52.1 { } {0 0 1 1} # Bug e3dcab1d14 -proc do-one-test-expr-61 {e p float athreshold} { +proc do-one-test-expr-63 {e p float athreshold} { # e - power of 2 to test # p - tcl_precision to test wuth # float - floating point value 2**-$p @@ -7297,7 +7297,7 @@ proc do-one-test-expr-61 {e p float athreshold} { } } -proc run-test-expr-61 {} { +proc run-test-expr-63 {} { for {set e 0} {$e <= 1023} {incr e} { set pt [expr {floor($e*log(2)/log(10))}] for {set p 6} {$p <= 17} {incr p} { @@ -7305,15 +7305,15 @@ proc run-test-expr-61 {} { set numer [expr {5**$e}] set xfloat x[expr {2.**-$e}] set float [string range $xfloat 1 end] - test expr-61.$p.$e "convert 2**-$e to decimal at precision $p" { - do-one-test-expr-61 $e $p $float $athreshold + test expr-63.$p.$e "convert 2**-$e to decimal at precision $p" { + do-one-test-expr-63 $e $p $float $athreshold } {} } } - rename do-one-test-expr-61 {} - rename run-test-expr-61 {} + rename do-one-test-expr-63 {} + rename run-test-expr-63 {} } -run-test-expr-61 +run-test-expr-63 # cleanup -- cgit v0.12 From 46dcc60e4182a6b1bfae4b7bf93d03430e5e8ce6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 23 Jan 2023 19:18:39 +0000 Subject: Better error-message for previous commit. Some more code-cleanup, backported from 8.7 --- generic/tclCmdAH.c | 84 ++++++++++----------------- generic/tclInterp.c | 107 +++++++++++++++++----------------- generic/tclNamesp.c | 162 +++++++++++++++++++++++----------------------------- 3 files changed, 153 insertions(+), 200 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index b3269f4..0bf5b8e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -46,24 +46,12 @@ struct ForeachState { static int CheckAccess(Tcl_Interp *interp, Tcl_Obj *pathPtr, int mode); -static int BadEncodingSubcommand(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int EncodingConvertfromObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int EncodingConverttoObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int EncodingDirsObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int EncodingNamesObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int EncodingSystemObjCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc BadEncodingSubcommand; +static Tcl_ObjCmdProc EncodingConvertfromObjCmd; +static Tcl_ObjCmdProc EncodingConverttoObjCmd; +static Tcl_ObjCmdProc EncodingDirsObjCmd; +static Tcl_ObjCmdProc EncodingNamesObjCmd; +static Tcl_ObjCmdProc EncodingSystemObjCmd; static inline int ForeachAssignments(Tcl_Interp *interp, struct ForeachState *statePtr); static inline void ForeachCleanup(Tcl_Interp *interp, @@ -132,7 +120,6 @@ static Tcl_ObjCmdProc PathTypeCmd; *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_BreakObjCmd( ClientData dummy, /* Not used. */ @@ -164,8 +151,6 @@ Tcl_BreakObjCmd( * *---------------------------------------------------------------------- */ - - /* ARGSUSED */ int Tcl_CaseObjCmd( ClientData dummy, /* Not used. */ @@ -300,7 +285,6 @@ Tcl_CaseObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_CatchObjCmd( ClientData dummy, /* Not used. */ @@ -353,8 +337,8 @@ CatchObjCmdCallback( { Interp *iPtr = (Interp *) interp; int objc = PTR2INT(data[0]); - Tcl_Obj *varNamePtr = data[1]; - Tcl_Obj *optionVarNamePtr = data[2]; + Tcl_Obj *varNamePtr = (Tcl_Obj *)data[1]; + Tcl_Obj *optionVarNamePtr = (Tcl_Obj *)data[2]; int rewind = iPtr->execEnvPtr->rewind; /* @@ -406,7 +390,6 @@ CatchObjCmdCallback( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_CdObjCmd( ClientData dummy, /* Not used. */ @@ -462,7 +445,6 @@ Tcl_CdObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ConcatObjCmd( ClientData dummy, /* Not used. */ @@ -497,7 +479,6 @@ Tcl_ConcatObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ContinueObjCmd( ClientData dummy, /* Not used. */ @@ -834,10 +815,11 @@ EncodingDirsObjCmd( */ int -EncodingNamesObjCmd(ClientData dummy, /* Unused */ - Tcl_Interp* interp, /* Tcl interpreter */ - int objc, /* Number of command line args */ - Tcl_Obj* const objv[]) /* Vector of command line args */ +EncodingNamesObjCmd( + ClientData dummy, /* Unused */ + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Number of command line args */ + Tcl_Obj* const objv[]) /* Vector of command line args */ { if (objc > 1) { Tcl_WrongNumArgs(interp, 1, objv, NULL); @@ -864,10 +846,11 @@ EncodingNamesObjCmd(ClientData dummy, /* Unused */ */ int -EncodingSystemObjCmd(ClientData dummy, /* Unused */ - Tcl_Interp* interp, /* Tcl interpreter */ - int objc, /* Number of command line args */ - Tcl_Obj* const objv[]) /* Vector of command line args */ +EncodingSystemObjCmd( + ClientData dummy, /* Unused */ + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Number of command line args */ + Tcl_Obj* const objv[]) /* Vector of command line args */ { if (objc > 2) { Tcl_WrongNumArgs(interp, 1, objv, "?encoding?"); @@ -899,7 +882,6 @@ EncodingSystemObjCmd(ClientData dummy, /* Unused */ *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ErrorObjCmd( ClientData dummy, /* Not used. */ @@ -949,7 +931,6 @@ Tcl_ErrorObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ static int EvalCmdErrMsg( ClientData data[], @@ -1032,7 +1013,6 @@ TclNREvalObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ExitObjCmd( ClientData dummy, /* Not used. */ @@ -1053,7 +1033,6 @@ Tcl_ExitObjCmd( return TCL_ERROR; } Tcl_Exit(value); - /*NOTREACHED*/ return TCL_OK; /* Better not ever reach this! */ } @@ -1081,7 +1060,6 @@ Tcl_ExitObjCmd( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ExprObjCmd( ClientData dummy, /* Not used. */ @@ -1125,8 +1103,8 @@ ExprCallback( Tcl_Interp *interp, int result) { - Tcl_Obj *resultPtr = data[0]; - Tcl_Obj *objPtr = data[1]; + Tcl_Obj *resultPtr = (Tcl_Obj *)data[0]; + Tcl_Obj *objPtr = (Tcl_Obj *)data[1]; if (objPtr != NULL) { Tcl_DecrRefCount(objPtr); @@ -2319,7 +2297,7 @@ FilesystemSeparatorCmd( return TCL_ERROR; } if (objc == 1) { - const char *separator = NULL; /* lint */ + const char *separator = NULL; switch (tclPlatform) { case TCL_PLATFORM_UNIX: @@ -2622,7 +2600,6 @@ GetTypeFromMode( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ForObjCmd( ClientData dummy, /* Not used. */ @@ -2670,7 +2647,7 @@ ForSetupCallback( Tcl_Interp *interp, int result) { - ForIterData *iterPtr = data[0]; + ForIterData *iterPtr = (ForIterData *)data[0]; if (result != TCL_OK) { if (result == TCL_ERROR) { @@ -2689,7 +2666,7 @@ TclNRForIterCallback( Tcl_Interp *interp, int result) { - ForIterData *iterPtr = data[0]; + ForIterData *iterPtr = (ForIterData *)data[0]; Tcl_Obj *boolObj; switch (result) { @@ -2725,8 +2702,8 @@ ForCondCallback( int result) { Interp *iPtr = (Interp *) interp; - ForIterData *iterPtr = data[0]; - Tcl_Obj *boolObj = data[1]; + ForIterData *iterPtr = (ForIterData *)data[0]; + Tcl_Obj *boolObj = (Tcl_Obj *)data[1]; int value; if (result != TCL_OK) { @@ -2763,7 +2740,7 @@ ForNextCallback( int result) { Interp *iPtr = (Interp *) interp; - ForIterData *iterPtr = data[0]; + ForIterData *iterPtr = (ForIterData *)data[0]; Tcl_Obj *next = iterPtr->next; if ((result == TCL_OK) || (result == TCL_CONTINUE)) { @@ -2787,7 +2764,7 @@ ForPostNextCallback( Tcl_Interp *interp, int result) { - ForIterData *iterPtr = data[0]; + ForIterData *iterPtr = (ForIterData *)data[0]; if ((result != TCL_BREAK) && (result != TCL_OK)) { if (result == TCL_ERROR) { @@ -2817,7 +2794,6 @@ ForPostNextCallback( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_ForeachObjCmd( ClientData dummy, /* Not used. */ @@ -2892,7 +2868,7 @@ EachloopCmd( * allocation for better performance. */ - statePtr = TclStackAlloc(interp, + statePtr = (struct ForeachState *)TclStackAlloc(interp, sizeof(struct ForeachState) + 3 * numLists * sizeof(int) + 2 * numLists * (sizeof(Tcl_Obj **) + sizeof(Tcl_Obj *))); memset(statePtr, 0, @@ -2993,7 +2969,7 @@ ForeachLoopStep( Tcl_Interp *interp, int result) { - struct ForeachState *statePtr = data[0]; + struct ForeachState *statePtr = (struct ForeachState *)data[0]; /* * Process the result code from this run of the [foreach] body. Note that @@ -3070,7 +3046,6 @@ ForeachAssignments( for (i=0 ; inumLists ; i++) { for (v=0 ; vvarcList[i] ; v++) { k = statePtr->index[i]++; - if (k < statePtr->argcList[i]) { valuePtr = statePtr->argvList[i][k]; } else { @@ -3135,7 +3110,6 @@ ForeachCleanup( *---------------------------------------------------------------------- */ - /* ARGSUSED */ int Tcl_FormatObjCmd( ClientData dummy, /* Not used. */ diff --git a/generic/tclInterp.c b/generic/tclInterp.c index 613a86a..2633a18 100644 --- a/generic/tclInterp.c +++ b/generic/tclInterp.c @@ -225,15 +225,12 @@ static int AliasList(Tcl_Interp *interp, Tcl_Interp *childInterp); static int AliasObjCmd(ClientData dummy, Tcl_Interp *currentInterp, int objc, Tcl_Obj *const objv[]); -static int AliasNRCmd(ClientData dummy, - Tcl_Interp *currentInterp, int objc, - Tcl_Obj *const objv[]); -static void AliasObjCmdDeleteProc(ClientData clientData); +static Tcl_ObjCmdProc AliasNRCmd; +static Tcl_CmdDeleteProc AliasObjCmdDeleteProc; static Tcl_Interp * GetInterp(Tcl_Interp *interp, Tcl_Obj *pathPtr); static Tcl_Interp * GetInterp2(Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static void InterpInfoDeleteProc(ClientData clientData, - Tcl_Interp *interp); +static Tcl_InterpDeleteProc InterpInfoDeleteProc; static int ChildBgerror(Tcl_Interp *interp, Tcl_Interp *childInterp, int objc, Tcl_Obj *const objv[]); @@ -259,7 +256,7 @@ static int ChildMarkTrusted(Tcl_Interp *interp, Tcl_Interp *childInterp); static int ChildObjCmd(ClientData dummy, Tcl_Interp *interp, int objc, Tcl_Obj *const objv[]); -static void ChildObjCmdDeleteProc(ClientData clientData); +static Tcl_CmdDeleteProc ChildObjCmdDeleteProc; static int ChildRecursionLimit(Tcl_Interp *interp, Tcl_Interp *childInterp, int objc, Tcl_Obj *const objv[]); @@ -309,7 +306,7 @@ TclSetPreInitScript( { const char *prevString = tclPreInitScript; tclPreInitScript = string; - return(prevString); + return prevString; } /* @@ -473,7 +470,7 @@ TclInterpInit( Parent *parentPtr; Child *childPtr; - interpInfoPtr = ckalloc(sizeof(InterpInfo)); + interpInfoPtr = (InterpInfo *)ckalloc(sizeof(InterpInfo)); ((Interp *) interp)->interpInfo = interpInfoPtr; parentPtr = &interpInfoPtr->parent; @@ -589,7 +586,7 @@ InterpInfoDeleteProc( * *---------------------------------------------------------------------- */ - /* ARGSUSED */ + int Tcl_InterpObjCmd( ClientData clientData, /* Unused. */ @@ -618,7 +615,7 @@ NRInterpCmd( "slaves", "share", "target", "transfer", NULL }; - enum option { + enum interpOptionEnum { OPT_ALIAS, OPT_ALIASES, OPT_BGERROR, OPT_CANCEL, OPT_CHILDREN, OPT_CREATE, OPT_DEBUG, OPT_DELETE, OPT_EVAL, OPT_EXISTS, OPT_EXPOSE, @@ -635,7 +632,7 @@ NRInterpCmd( &index) != TCL_OK) { return TCL_ERROR; } - switch ((enum option) index) { + switch ((enum interpOptionEnum)index) { case OPT_ALIAS: { Tcl_Interp *parentInterp; @@ -688,7 +685,7 @@ NRInterpCmd( static const char *const cancelOptions[] = { "-unwind", "--", NULL }; - enum option { + enum optionCancelEnum { OPT_UNWIND, OPT_LAST }; @@ -703,7 +700,7 @@ NRInterpCmd( return TCL_ERROR; } - switch ((enum option) index) { + switch ((enum optionCancelEnum) index) { case OPT_UNWIND: /* * The evaluation stack in the target interp is to be unwound. @@ -1024,7 +1021,7 @@ NRInterpCmd( TclNewObj(resultPtr); hPtr = Tcl_FirstHashEntry(&iiPtr->parent.childTable, &hashSearch); for ( ; hPtr != NULL; hPtr = Tcl_NextHashEntry(&hashSearch)) { - string = Tcl_GetHashKey(&iiPtr->parent.childTable, hPtr); + string = (char *)Tcl_GetHashKey(&iiPtr->parent.childTable, hPtr); Tcl_ListObjAppendElement(NULL, resultPtr, Tcl_NewStringObj(string, -1)); } @@ -1095,7 +1092,7 @@ NRInterpCmd( NULL); return TCL_ERROR; } - aliasPtr = Tcl_GetHashValue(hPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(hPtr); if (Tcl_GetInterpPath(interp, aliasPtr->targetInterp) != TCL_OK) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "target interpreter for alias \"%s\" in path \"%s\" is " @@ -1178,7 +1175,7 @@ Tcl_CreateAlias( int i; int result; - objv = TclStackAlloc(childInterp, (unsigned) sizeof(Tcl_Obj *) * argc); + objv = (Tcl_Obj **)TclStackAlloc(childInterp, sizeof(Tcl_Obj *) * argc); for (i = 0; i < argc; i++) { objv[i] = Tcl_NewStringObj(argv[i], -1); Tcl_IncrRefCount(objv[i]); @@ -1284,7 +1281,7 @@ Tcl_GetAlias( Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ALIAS", aliasName, NULL); return TCL_ERROR; } - aliasPtr = Tcl_GetHashValue(hPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(hPtr); objc = aliasPtr->objc; objv = &aliasPtr->objPtr; @@ -1346,7 +1343,7 @@ Tcl_GetAliasObj( Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ALIAS", aliasName, NULL); return TCL_ERROR; } - aliasPtr = Tcl_GetHashValue(hPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(hPtr); objc = aliasPtr->objc; objv = &aliasPtr->objPtr; @@ -1414,7 +1411,7 @@ TclPreventAliasLoop( * chain then we have a loop. */ - aliasPtr = cmdPtr->objClientData; + aliasPtr = (Alias *)cmdPtr->objClientData; nextAliasPtr = aliasPtr; while (1) { Tcl_Obj *cmdNamePtr; @@ -1462,10 +1459,8 @@ TclPreventAliasLoop( if (aliasCmdPtr->objProc != AliasObjCmd) { return TCL_OK; } - nextAliasPtr = aliasCmdPtr->objClientData; + nextAliasPtr = (Alias *)aliasCmdPtr->objClientData; } - - /* NOTREACHED */ } /* @@ -1505,7 +1500,7 @@ AliasCreate( Tcl_Obj **prefv; int isNew, i; - aliasPtr = ckalloc(sizeof(Alias) + objc * sizeof(Tcl_Obj *)); + aliasPtr = (Alias *)ckalloc(sizeof(Alias) + objc * sizeof(Tcl_Obj *)); aliasPtr->token = namePtr; Tcl_IncrRefCount(aliasPtr->token); aliasPtr->targetInterp = parentInterp; @@ -1613,7 +1608,7 @@ AliasCreate( * interp alias {} foo {} zop # Now recreate "foo"... */ - targetPtr = ckalloc(sizeof(Target)); + targetPtr = (Target *)ckalloc(sizeof(Target)); targetPtr->childCmd = aliasPtr->childCmd; targetPtr->childInterp = childInterp; @@ -1674,7 +1669,7 @@ AliasDelete( TclGetString(namePtr), NULL); return TCL_ERROR; } - aliasPtr = Tcl_GetHashValue(hPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(hPtr); Tcl_DeleteCommandFromToken(childInterp, aliasPtr->childCmd); return TCL_OK; } @@ -1719,7 +1714,7 @@ AliasDescribe( if (hPtr == NULL) { return TCL_OK; } - aliasPtr = Tcl_GetHashValue(hPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(hPtr); prefixPtr = Tcl_NewListObj(aliasPtr->objc, &aliasPtr->objPtr); Tcl_SetObjResult(interp, prefixPtr); return TCL_OK; @@ -1757,7 +1752,7 @@ AliasList( entryPtr = Tcl_FirstHashEntry(&childPtr->aliasTable, &hashSearch); for ( ; entryPtr != NULL; entryPtr = Tcl_NextHashEntry(&hashSearch)) { - aliasPtr = Tcl_GetHashValue(entryPtr); + aliasPtr = (Alias *)Tcl_GetHashValue(entryPtr); Tcl_ListObjAppendElement(NULL, resultPtr, aliasPtr->token); } Tcl_SetObjResult(interp, resultPtr); @@ -1793,7 +1788,7 @@ AliasNRCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument vector. */ { - Alias *aliasPtr = clientData; + Alias *aliasPtr = (Alias *)clientData; int prefc, cmdc, i; Tcl_Obj **prefv, **cmdv; Tcl_Obj *listPtr; @@ -1842,7 +1837,7 @@ AliasObjCmd( Tcl_Obj *const objv[]) /* Argument vector. */ { #define ALIAS_CMDV_PREALLOC 10 - Alias *aliasPtr = clientData; + Alias *aliasPtr = (Alias *)clientData; Tcl_Interp *targetInterp = aliasPtr->targetInterp; int result, prefc, cmdc, i; Tcl_Obj **prefv, **cmdv; @@ -1861,7 +1856,7 @@ AliasObjCmd( if (cmdc <= ALIAS_CMDV_PREALLOC) { cmdv = cmdArr; } else { - cmdv = TclStackAlloc(interp, cmdc * sizeof(Tcl_Obj *)); + cmdv = (Tcl_Obj **)TclStackAlloc(interp, cmdc * sizeof(Tcl_Obj *)); } memcpy(cmdv, prefv, prefc * sizeof(Tcl_Obj *)); @@ -1947,7 +1942,7 @@ static void AliasObjCmdDeleteProc( ClientData clientData) /* The alias record for this alias. */ { - Alias *aliasPtr = clientData; + Alias *aliasPtr = (Alias *)clientData; Target *targetPtr; int i; Tcl_Obj **objv; @@ -2123,7 +2118,7 @@ TclSetChildCancelFlags( hPtr = Tcl_FirstHashEntry(&parentPtr->childTable, &hashSearch); for ( ; hPtr != NULL; hPtr = Tcl_NextHashEntry(&hashSearch)) { - childPtr = Tcl_GetHashValue(hPtr); + childPtr = (Child *)Tcl_GetHashValue(hPtr); iPtr = (Interp *) childPtr->childInterp; if (iPtr == NULL) { @@ -2188,7 +2183,7 @@ Tcl_GetInterpPath( return TCL_ERROR; } Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), - Tcl_NewStringObj(Tcl_GetHashKey(&iiPtr->parent.childTable, + Tcl_NewStringObj((const char *)Tcl_GetHashKey(&iiPtr->parent.childTable, iiPtr->child.childEntryPtr), -1)); return TCL_OK; } @@ -2236,7 +2231,7 @@ GetInterp( searchInterp = NULL; break; } - childPtr = Tcl_GetHashValue(hPtr); + childPtr = (Child *)Tcl_GetHashValue(hPtr); searchInterp = childPtr->childInterp; if (searchInterp == NULL) { break; @@ -2462,7 +2457,7 @@ NRChildCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - Tcl_Interp *childInterp = clientData; + Tcl_Interp *childInterp = (Tcl_Interp *)clientData; int index; static const char *const options[] = { "alias", "aliases", "bgerror", "debug", @@ -2470,7 +2465,7 @@ NRChildCmd( "issafe", "invokehidden", "limit", "marktrusted", "recursionlimit", NULL }; - enum options { + enum childCmdOptionsEnum { OPT_ALIAS, OPT_ALIASES, OPT_BGERROR, OPT_DEBUG, OPT_EVAL, OPT_EXPOSE, OPT_HIDE, OPT_HIDDEN, OPT_ISSAFE, OPT_INVOKEHIDDEN, OPT_LIMIT, OPT_MARKTRUSTED, @@ -2490,7 +2485,7 @@ NRChildCmd( return TCL_ERROR; } - switch ((enum options) index) { + switch ((enum childCmdOptionsEnum) index) { case OPT_ALIAS: if (objc > 2) { if (objc == 3) { @@ -2666,7 +2661,7 @@ ChildObjCmdDeleteProc( ClientData clientData) /* The ChildRecord for the command. */ { Child *childPtr; /* Interim storage for Child record. */ - Tcl_Interp *childInterp = clientData; + Tcl_Interp *childInterp = (Tcl_Interp *)clientData; /* And for a child interp. */ childPtr = &((InterpInfo *) ((Interp *) childInterp)->interpInfo)->child; @@ -2995,7 +2990,7 @@ ChildHidden( Tcl_Interp *interp, /* Interp for data return. */ Tcl_Interp *childInterp) /* Interp whose hidden commands to query. */ { - Tcl_Obj *listObjPtr; /* Local object pointer. */ + Tcl_Obj *listObjPtr; /* Local object pointer. */ Tcl_HashTable *hTblPtr; /* For local searches. */ Tcl_HashEntry *hPtr; /* For local searches. */ Tcl_HashSearch hSearch; /* For local searches. */ @@ -3007,7 +3002,7 @@ ChildHidden( hPtr != NULL; hPtr = Tcl_NextHashEntry(&hSearch)) { Tcl_ListObjAppendElement(NULL, listObjPtr, - Tcl_NewStringObj(Tcl_GetHashKey(hTblPtr, hPtr), -1)); + Tcl_NewStringObj((const char *)Tcl_GetHashKey(hTblPtr, hPtr), -1)); } } Tcl_SetObjResult(interp, listObjPtr); @@ -3530,7 +3525,7 @@ Tcl_LimitAddHandler( * Allocate a handler record. */ - handlerPtr = ckalloc(sizeof(LimitHandler)); + handlerPtr = (LimitHandler *)ckalloc(sizeof(LimitHandler)); handlerPtr->flags = 0; handlerPtr->handlerProc = handlerProc; handlerPtr->clientData = clientData; @@ -3987,8 +3982,8 @@ static void TimeLimitCallback( ClientData clientData) { - Tcl_Interp *interp = clientData; - Interp *iPtr = clientData; + Tcl_Interp *interp = (Tcl_Interp *)clientData; + Interp *iPtr = (Interp *)clientData; int code; Tcl_Preserve(interp); @@ -4131,7 +4126,7 @@ static void DeleteScriptLimitCallback( ClientData clientData) { - ScriptLimitCallback *limitCBPtr = clientData; + ScriptLimitCallback *limitCBPtr = (ScriptLimitCallback *)clientData; Tcl_DecrRefCount(limitCBPtr->scriptObj); if (limitCBPtr->entryPtr != NULL) { @@ -4163,7 +4158,7 @@ CallScriptLimitCallback( ClientData clientData, Tcl_Interp *interp) /* Interpreter which failed the limit */ { - ScriptLimitCallback *limitCBPtr = clientData; + ScriptLimitCallback *limitCBPtr = (ScriptLimitCallback *)clientData; int code; if (Tcl_InterpDeleted(limitCBPtr->interp)) { @@ -4231,13 +4226,13 @@ SetScriptLimitCallback( hashPtr = Tcl_CreateHashEntry(&iPtr->limit.callbacks, &key, &isNew); if (!isNew) { - limitCBPtr = Tcl_GetHashValue(hashPtr); + limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hashPtr); limitCBPtr->entryPtr = NULL; Tcl_LimitRemoveHandler(targetInterp, type, CallScriptLimitCallback, limitCBPtr); } - limitCBPtr = ckalloc(sizeof(ScriptLimitCallback)); + limitCBPtr = (ScriptLimitCallback *)ckalloc(sizeof(ScriptLimitCallback)); limitCBPtr->interp = interp; limitCBPtr->scriptObj = scriptObj; limitCBPtr->entryPtr = hashPtr; @@ -4426,7 +4421,7 @@ ChildCommandLimitCmd( key.type = TCL_LIMIT_COMMANDS; hPtr = Tcl_FindHashEntry(&iPtr->limit.callbacks, (char *) &key); if (hPtr != NULL) { - limitCBPtr = Tcl_GetHashValue(hPtr); + limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], -1), limitCBPtr->scriptObj); @@ -4468,7 +4463,7 @@ ChildCommandLimitCmd( key.type = TCL_LIMIT_COMMANDS; hPtr = Tcl_FindHashEntry(&iPtr->limit.callbacks, (char *) &key); if (hPtr != NULL) { - limitCBPtr = Tcl_GetHashValue(hPtr); + limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { Tcl_SetObjResult(interp, limitCBPtr->scriptObj); } @@ -4502,7 +4497,7 @@ ChildCommandLimitCmd( switch ((enum Options) index) { case OPT_CMD: scriptObj = objv[i+1]; - (void) Tcl_GetStringFromObj(objv[i+1], &scriptLen); + (void) Tcl_GetStringFromObj(scriptObj, &scriptLen); break; case OPT_GRAN: granObj = objv[i+1]; @@ -4614,7 +4609,7 @@ ChildTimeLimitCmd( key.type = TCL_LIMIT_TIME; hPtr = Tcl_FindHashEntry(&iPtr->limit.callbacks, (char *) &key); if (hPtr != NULL) { - limitCBPtr = Tcl_GetHashValue(hPtr); + limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], -1), limitCBPtr->scriptObj); @@ -4662,7 +4657,7 @@ ChildTimeLimitCmd( key.type = TCL_LIMIT_TIME; hPtr = Tcl_FindHashEntry(&iPtr->limit.callbacks, (char *) &key); if (hPtr != NULL) { - limitCBPtr = Tcl_GetHashValue(hPtr); + limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { Tcl_SetObjResult(interp, limitCBPtr->scriptObj); } @@ -4754,9 +4749,15 @@ ChildTimeLimitCmd( if (TclGetWideIntFromObj(interp, objv[i+1], &sec) != TCL_OK) { return TCL_ERROR; } + if (sec > LONG_MAX) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "seconds must be between 0 and %ld", LONG_MAX)); + goto badValue; + } if (sec < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "seconds must be at least 0", -1)); + badValue: Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADVALUE", NULL); return TCL_ERROR; diff --git a/generic/tclNamesp.c b/generic/tclNamesp.c index eccca78..7290bd1 100644 --- a/generic/tclNamesp.c +++ b/generic/tclNamesp.c @@ -31,7 +31,7 @@ * limited to a single interpreter. */ -typedef struct ThreadSpecificData { +typedef struct { long numNsCreated; /* Count of the number of namespaces created * within the thread. This value is used as a * unique id for each namespace. Cannot be @@ -89,51 +89,30 @@ static char * EstablishErrorInfoTraces(ClientData clientData, static void FreeNsNameInternalRep(Tcl_Obj *objPtr); static int GetNamespaceFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Namespace **nsPtrPtr); -static int InvokeImportedCmd(ClientData clientData, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); static int InvokeImportedNRCmd(ClientData clientData, Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceChildrenCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceCodeCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceCurrentCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceDeleteCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceEvalCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NRNamespaceEvalCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceExistsCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceExportCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceForgetCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc InvokeImportedCmd; +static Tcl_ObjCmdProc NamespaceChildrenCmd; +static Tcl_ObjCmdProc NamespaceCodeCmd; +static Tcl_ObjCmdProc NamespaceCurrentCmd; +static Tcl_ObjCmdProc NamespaceDeleteCmd; +static Tcl_ObjCmdProc NamespaceEvalCmd; +static Tcl_ObjCmdProc NRNamespaceEvalCmd; +static Tcl_ObjCmdProc NamespaceExistsCmd; +static Tcl_ObjCmdProc NamespaceExportCmd; +static Tcl_ObjCmdProc NamespaceForgetCmd; static void NamespaceFree(Namespace *nsPtr); -static int NamespaceImportCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceInscopeCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NRNamespaceInscopeCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceOriginCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceParentCmd(ClientData dummy,Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespacePathCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceQualifiersCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceTailCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceUpvarCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); -static int NamespaceUnknownCmd(ClientData dummy, - Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); -static int NamespaceWhichCmd(ClientData dummy, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc NamespaceImportCmd; +static Tcl_ObjCmdProc NamespaceInscopeCmd; +static Tcl_ObjCmdProc NRNamespaceInscopeCmd; +static Tcl_ObjCmdProc NamespaceOriginCmd; +static Tcl_ObjCmdProc NamespaceParentCmd; +static Tcl_ObjCmdProc NamespacePathCmd; +static Tcl_ObjCmdProc NamespaceQualifiersCmd; +static Tcl_ObjCmdProc NamespaceTailCmd; +static Tcl_ObjCmdProc NamespaceUpvarCmd; +static Tcl_ObjCmdProc NamespaceUnknownCmd; +static Tcl_ObjCmdProc NamespaceWhichCmd; static int SetNsNameFromAny(Tcl_Interp *interp, Tcl_Obj *objPtr); static void UnlinkNsPath(Namespace *nsPtr); @@ -320,7 +299,6 @@ Tcl_PushCallFrame( if (nsPtr->flags & NS_DEAD) { Tcl_Panic("Trying to push call frame for dead namespace"); - /*NOTREACHED*/ } } @@ -465,7 +443,7 @@ TclPushStackFrame( * treated as references to namespace * variables. */ { - *framePtrPtr = TclStackAlloc(interp, sizeof(CallFrame)); + *framePtrPtr = (Tcl_CallFrame *)TclStackAlloc(interp, sizeof(CallFrame)); return Tcl_PushCallFrame(interp, *framePtrPtr, namespacePtr, isProcCallFrame); } @@ -770,9 +748,9 @@ Tcl_CreateNamespace( */ doCreate: - nsPtr = ckalloc(sizeof(Namespace)); + nsPtr = (Namespace *)ckalloc(sizeof(Namespace)); nameLen = strlen(simpleName) + 1; - nsPtr->name = ckalloc(nameLen); + nsPtr->name = (char *)ckalloc(nameLen); memcpy(nsPtr->name, simpleName, nameLen); nsPtr->fullName = NULL; /* Set below. */ nsPtr->clientData = clientData; @@ -860,7 +838,7 @@ Tcl_CreateNamespace( name = Tcl_DStringValue(namePtr); nameLen = Tcl_DStringLength(namePtr); - nsPtr->fullName = ckalloc(nameLen + 1); + nsPtr->fullName = (char *)ckalloc(nameLen + 1); memcpy(nsPtr->fullName, name, nameLen + 1); Tcl_DStringFree(&buffer1); @@ -952,7 +930,7 @@ Tcl_DeleteNamespace( for (entryPtr = Tcl_FirstHashEntry(&nsPtr->cmdTable, &search); entryPtr != NULL;) { - cmdPtr = Tcl_GetHashValue(entryPtr); + cmdPtr = (Command *)Tcl_GetHashValue(entryPtr); if (cmdPtr->nreProc == TclNRInterpCoroutine) { Tcl_DeleteCommandFromToken((Tcl_Interp *) iPtr, (Tcl_Command) cmdPtr); @@ -1131,14 +1109,14 @@ TclTeardownNamespace( while (nsPtr->cmdTable.numEntries > 0) { int length = nsPtr->cmdTable.numEntries; - Command **cmds = TclStackAlloc((Tcl_Interp *) iPtr, + Command **cmds = (Command **)TclStackAlloc((Tcl_Interp *) iPtr, sizeof(Command *) * length); i = 0; for (entryPtr = Tcl_FirstHashEntry(&nsPtr->cmdTable, &search); entryPtr != NULL; entryPtr = Tcl_NextHashEntry(&search)) { - cmds[i] = Tcl_GetHashValue(entryPtr); + cmds[i] = (Command *)Tcl_GetHashValue(entryPtr); cmds[i]->refCount++; i++; } @@ -1445,7 +1423,7 @@ Tcl_Export( if (neededElems > nsPtr->maxExportPatterns) { nsPtr->maxExportPatterns = nsPtr->maxExportPatterns ? 2 * nsPtr->maxExportPatterns : INIT_EXPORT_PATTERNS; - nsPtr->exportArrayPtr = ckrealloc(nsPtr->exportArrayPtr, + nsPtr->exportArrayPtr = (char **)ckrealloc(nsPtr->exportArrayPtr, sizeof(char *) * nsPtr->maxExportPatterns); } @@ -1454,7 +1432,7 @@ Tcl_Export( */ len = strlen(pattern); - patternCpy = ckalloc(len + 1); + patternCpy = (char *)ckalloc(len + 1); memcpy(patternCpy, pattern, len + 1); nsPtr->exportArrayPtr[nsPtr->numExportPatterns] = patternCpy; @@ -1665,7 +1643,7 @@ Tcl_Import( } for (hPtr = Tcl_FirstHashEntry(&importNsPtr->cmdTable, &search); (hPtr != NULL); hPtr = Tcl_NextHashEntry(&search)) { - char *cmdName = Tcl_GetHashKey(&importNsPtr->cmdTable, hPtr); + char *cmdName = (char *)Tcl_GetHashKey(&importNsPtr->cmdTable, hPtr); if (Tcl_StringMatch(cmdName, simplePattern) && DoImport(interp, nsPtr, hPtr, cmdName, pattern, importNsPtr, @@ -1752,13 +1730,13 @@ DoImport( * namespace would create a cycle of imported command references. */ - cmdPtr = Tcl_GetHashValue(hPtr); + cmdPtr = (Command *)Tcl_GetHashValue(hPtr); if (found != NULL && cmdPtr->deleteProc == DeleteImportedCmd) { - Command *overwrite = Tcl_GetHashValue(found); + Command *overwrite = (Command *)Tcl_GetHashValue(found); Command *linkCmd = cmdPtr; while (linkCmd->deleteProc == DeleteImportedCmd) { - dataPtr = linkCmd->objClientData; + dataPtr = (ImportedCmdData *)linkCmd->objClientData; linkCmd = dataPtr->realCmdPtr; if (overwrite == linkCmd) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -1772,7 +1750,7 @@ DoImport( } } - dataPtr = ckalloc(sizeof(ImportedCmdData)); + dataPtr = (ImportedCmdData *)ckalloc(sizeof(ImportedCmdData)); importedCmd = Tcl_NRCreateCommand(interp, Tcl_DStringValue(&ds), InvokeImportedCmd, InvokeImportedNRCmd, dataPtr, DeleteImportedCmd); @@ -1786,15 +1764,15 @@ DoImport( * and add it to the import ref list in the "real" command. */ - refPtr = ckalloc(sizeof(ImportRef)); + refPtr = (ImportRef *)ckalloc(sizeof(ImportRef)); refPtr->importedCmdPtr = (Command *) importedCmd; refPtr->nextPtr = cmdPtr->importRefPtr; cmdPtr->importRefPtr = refPtr; } else { - Command *overwrite = Tcl_GetHashValue(found); + Command *overwrite = (Command *)Tcl_GetHashValue(found); if (overwrite->deleteProc == DeleteImportedCmd) { - ImportedCmdData *dataPtr = overwrite->objClientData; + ImportedCmdData *dataPtr = (ImportedCmdData *)overwrite->objClientData; if (dataPtr->realCmdPtr == Tcl_GetHashValue(hPtr)) { /* @@ -1888,7 +1866,7 @@ Tcl_ForgetImport( if (TclMatchIsTrivial(simplePattern)) { hPtr = Tcl_FindHashEntry(&nsPtr->cmdTable, simplePattern); if (hPtr != NULL) { - Command *cmdPtr = Tcl_GetHashValue(hPtr); + Command *cmdPtr = (Command *)Tcl_GetHashValue(hPtr); if (cmdPtr && (cmdPtr->deleteProc == DeleteImportedCmd)) { Tcl_DeleteCommandFromToken(interp, (Tcl_Command) cmdPtr); @@ -1898,12 +1876,12 @@ Tcl_ForgetImport( } for (hPtr = Tcl_FirstHashEntry(&nsPtr->cmdTable, &search); (hPtr != NULL); hPtr = Tcl_NextHashEntry(&search)) { - Command *cmdPtr = Tcl_GetHashValue(hPtr); + Command *cmdPtr = (Command *)Tcl_GetHashValue(hPtr); if (cmdPtr->deleteProc != DeleteImportedCmd) { continue; } - cmdName = Tcl_GetHashKey(&nsPtr->cmdTable, hPtr); + cmdName = (char *)Tcl_GetHashKey(&nsPtr->cmdTable, hPtr); if (Tcl_StringMatch(cmdName, simplePattern)) { Tcl_DeleteCommandFromToken(interp, (Tcl_Command) cmdPtr); } @@ -1918,7 +1896,7 @@ Tcl_ForgetImport( for (hPtr = Tcl_FirstHashEntry(&nsPtr->cmdTable, &search); (hPtr != NULL); hPtr = Tcl_NextHashEntry(&search)) { Tcl_CmdInfo info; - Tcl_Command token = Tcl_GetHashValue(hPtr); + Tcl_Command token = (Tcl_Command)Tcl_GetHashValue(hPtr); Tcl_Command origin = TclGetOriginalCommand(token); if (Tcl_GetCommandInfoFromToken(origin, &info) == 0) { @@ -1931,7 +1909,7 @@ Tcl_ForgetImport( */ Command *cmdPtr = (Command *) token; - ImportedCmdData *dataPtr = cmdPtr->objClientData; + ImportedCmdData *dataPtr = (ImportedCmdData *)cmdPtr->objClientData; Tcl_Command firstToken = (Tcl_Command) dataPtr->realCmdPtr; if (firstToken == origin) { @@ -1986,7 +1964,7 @@ TclGetOriginalCommand( } while (cmdPtr->deleteProc == DeleteImportedCmd) { - dataPtr = cmdPtr->objClientData; + dataPtr = (ImportedCmdData *)cmdPtr->objClientData; cmdPtr = dataPtr->realCmdPtr; } return (Tcl_Command) cmdPtr; @@ -2019,7 +1997,7 @@ InvokeImportedNRCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* The argument objects. */ { - ImportedCmdData *dataPtr = clientData; + ImportedCmdData *dataPtr = (ImportedCmdData *)clientData; Command *realCmdPtr = dataPtr->realCmdPtr; TclSkipTailcall(interp); @@ -2064,7 +2042,7 @@ DeleteImportedCmd( ClientData clientData) /* Points to the imported command's * ImportedCmdData structure. */ { - ImportedCmdData *dataPtr = clientData; + ImportedCmdData *dataPtr = (ImportedCmdData *)clientData; Command *realCmdPtr = dataPtr->realCmdPtr; Command *selfPtr = dataPtr->selfPtr; ImportRef *refPtr, *prevPtr; @@ -2340,7 +2318,7 @@ TclGetNamespaceForQualName( } #endif if (entryPtr != NULL) { - nsPtr = Tcl_GetHashValue(entryPtr); + nsPtr = (Namespace *)Tcl_GetHashValue(entryPtr); } else if (flags & TCL_CREATE_NS_IF_UNKNOWN) { Tcl_CallFrame *framePtr; @@ -2375,7 +2353,7 @@ TclGetNamespaceForQualName( } #endif if (entryPtr != NULL) { - altNsPtr = Tcl_GetHashValue(entryPtr); + altNsPtr = (Namespace *)Tcl_GetHashValue(entryPtr); } else { altNsPtr = NULL; } @@ -2625,7 +2603,7 @@ Tcl_FindCommand( || !(realNsPtr->flags & NS_DYING)) { entryPtr = Tcl_FindHashEntry(&realNsPtr->cmdTable, simpleName); if (entryPtr != NULL) { - cmdPtr = Tcl_GetHashValue(entryPtr); + cmdPtr = (Command *)Tcl_GetHashValue(entryPtr); } } } @@ -2646,7 +2624,7 @@ Tcl_FindCommand( && !(realNsPtr->flags & NS_DYING)) { entryPtr = Tcl_FindHashEntry(&realNsPtr->cmdTable, simpleName); if (entryPtr != NULL) { - cmdPtr = Tcl_GetHashValue(entryPtr); + cmdPtr = (Command *)Tcl_GetHashValue(entryPtr); } } } @@ -2664,7 +2642,7 @@ Tcl_FindCommand( && !(realNsPtr->flags & NS_DYING)) { entryPtr = Tcl_FindHashEntry(&realNsPtr->cmdTable, simpleName); if (entryPtr != NULL) { - cmdPtr = Tcl_GetHashValue(entryPtr); + cmdPtr = (Command *)Tcl_GetHashValue(entryPtr); } } } @@ -2686,7 +2664,7 @@ Tcl_FindCommand( entryPtr = Tcl_FindHashEntry(&nsPtr[search]->cmdTable, simpleName); if (entryPtr != NULL) { - cmdPtr = Tcl_GetHashValue(entryPtr); + cmdPtr = (Command *)Tcl_GetHashValue(entryPtr); } } } @@ -2750,7 +2728,7 @@ TclResetShadowedCmdRefs( int found, i; int trailFront = -1; int trailSize = 5; /* Formerly NUM_TRAIL_ELEMS. */ - Namespace **trailPtr = TclStackAlloc(interp, + Namespace **trailPtr = (Namespace **)TclStackAlloc(interp, trailSize * sizeof(Namespace *)); /* @@ -2770,7 +2748,7 @@ TclResetShadowedCmdRefs( * cmdName. */ - cmdName = Tcl_GetHashKey(newCmdPtr->hPtr->tablePtr, newCmdPtr->hPtr); + cmdName = (char *)Tcl_GetHashKey(newCmdPtr->hPtr->tablePtr, newCmdPtr->hPtr); for (nsPtr=newCmdPtr->nsPtr ; (nsPtr!=NULL) && (nsPtr!=globalNsPtr) ; nsPtr=nsPtr->parentPtr) { /* @@ -2799,7 +2777,7 @@ TclResetShadowedCmdRefs( } #endif if (hPtr != NULL) { - shadowNsPtr = Tcl_GetHashValue(hPtr); + shadowNsPtr = (Namespace *)Tcl_GetHashValue(hPtr); } else { found = 0; break; @@ -2840,7 +2818,7 @@ TclResetShadowedCmdRefs( if (trailFront == trailSize) { int newSize = 2 * trailSize; - trailPtr = TclStackRealloc(interp, trailPtr, + trailPtr = (Namespace **)TclStackRealloc(interp, trailPtr, newSize * sizeof(Namespace *)); trailSize = newSize; } @@ -3065,7 +3043,7 @@ NamespaceChildrenCmd( entryPtr = Tcl_FirstHashEntry(nsPtr->childTablePtr, &search); #endif while (entryPtr != NULL) { - childNsPtr = Tcl_GetHashValue(entryPtr); + childNsPtr = (Namespace *)Tcl_GetHashValue(entryPtr); if ((pattern == NULL) || Tcl_StringMatch(childNsPtr->fullName, pattern)) { elemPtr = Tcl_NewStringObj(childNsPtr->fullName, -1); @@ -3428,13 +3406,13 @@ NsEval_Callback( Tcl_Interp *interp, int result) { - Tcl_Namespace *namespacePtr = data[0]; + Tcl_Namespace *namespacePtr = (Tcl_Namespace *)data[0]; if (result == TCL_ERROR) { int length = strlen(namespacePtr->fullName); int limit = 200; int overflow = (length > limit); - char *cmd = data[1]; + char *cmd = (char *)data[1]; Tcl_AppendObjToErrorInfo(interp, Tcl_ObjPrintf( "\n (in namespace %s \"%.*s%s\" script line %d)", @@ -3550,7 +3528,7 @@ NamespaceExportCmd( Tcl_Obj *listPtr; TclNewObj(listPtr); - (void) Tcl_AppendExportList(interp, NULL, listPtr); + (void)Tcl_AppendExportList(interp, NULL, listPtr); Tcl_SetObjResult(interp, listPtr); return TCL_OK; } @@ -3716,11 +3694,11 @@ NamespaceImportCmd( TclNewObj(listPtr); for (hPtr = Tcl_FirstHashEntry(&nsPtr->cmdTable, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { - Command *cmdPtr = Tcl_GetHashValue(hPtr); + Command *cmdPtr = (Command *)Tcl_GetHashValue(hPtr); if (cmdPtr->deleteProc == DeleteImportedCmd) { Tcl_ListObjAppendElement(NULL, listPtr, Tcl_NewStringObj( - Tcl_GetHashKey(&nsPtr->cmdTable, hPtr) ,-1)); + (char *)Tcl_GetHashKey(&nsPtr->cmdTable, hPtr) ,-1)); } } Tcl_SetObjResult(interp, listPtr); @@ -4048,7 +4026,7 @@ NamespacePathCmd( goto badNamespace; } if (nsObjc != 0) { - namespaceList = TclStackAlloc(interp, + namespaceList = (Tcl_Namespace **)TclStackAlloc(interp, sizeof(Tcl_Namespace *) * nsObjc); for (i=0 ; iinternalRep.twoPtrValue.ptr1; + ResolvedNsName *resNamePtr = (ResolvedNsName *)objPtr->internalRep.twoPtrValue.ptr1; /* * Decrement the reference count of the namespace. If there are no more @@ -4812,12 +4790,12 @@ SetNsNameFromAny( } nsPtr->refCount++; - resNamePtr = ckalloc(sizeof(ResolvedNsName)); + resNamePtr = (ResolvedNsName *)ckalloc(sizeof(ResolvedNsName)); resNamePtr->nsPtr = nsPtr; if ((name[0] == ':') && (name[1] == ':')) { resNamePtr->refNsPtr = NULL; } else { - resNamePtr->refNsPtr = (Namespace *) Tcl_GetCurrentNamespace(interp); + resNamePtr->refNsPtr = (Namespace *)Tcl_GetCurrentNamespace(interp); } resNamePtr->refCount = 1; TclFreeIntRep(objPtr); @@ -4874,7 +4852,7 @@ TclGetNamespaceChildTable( return &nPtr->childTable; #else if (nPtr->childTablePtr == NULL) { - nPtr->childTablePtr = ckalloc(sizeof(Tcl_HashTable)); + nPtr->childTablePtr = (Tcl_HashTable *)ckalloc(sizeof(Tcl_HashTable)); Tcl_InitHashTable(nPtr->childTablePtr, TCL_STRING_KEYS); } return nPtr->childTablePtr; @@ -4963,7 +4941,7 @@ TclLogCommandInfo( } else { Tcl_HashEntry *hPtr = Tcl_FindHashEntry(&iPtr->varTraces, (char *) varPtr); - VarTrace *tracePtr = Tcl_GetHashValue(hPtr); + VarTrace *tracePtr = (VarTrace *)Tcl_GetHashValue(hPtr); if (tracePtr->traceProc != EstablishErrorInfoTraces) { /* -- cgit v0.12 From ca4e244cca3e93fb8689fe1cef85954da16ff989 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 25 Jan 2023 02:52:24 +0000 Subject: Test TableFromUtfProc() with strict handling of encoding errors. --- generic/tclEncoding.c | 2 +- tests/encoding.test | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d10d9ca..5ba7763 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3199,7 +3199,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { + if ((STOPONERROR)) { result = TCL_CONVERT_UNKNOWN; break; } diff --git a/tests/encoding.test b/tests/encoding.test index a1d129e..d9382e4 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -513,8 +513,11 @@ test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD -test encoding-18.1 {TableToUtfProc} { -} {} + +test encoding-18.1 {TableToUtfProc error on invalid input with -strict} -body { + list [catch {encoding convertto -strict jis0208 \\} res] $res +} -result {1 {unexpected character at index 0: 'U+00005C'}} + test encoding-19.1 {TableFromUtfProc} { } {} @@ -915,6 +918,7 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} + # cleanup namespace delete ::tcl::test::encoding ::tcltest::cleanupTests -- cgit v0.12 From d271c9f407a60528785465284451c752639b1128 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Thu, 26 Jan 2023 23:55:30 +0000 Subject: Fix for [ee08ed090b0a5408], sporadic segmentation fault in coroutine.test/coroutine-7.4. --- generic/tclBasic.c | 1 + 1 file changed, 1 insertion(+) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 80dc416..bea5996 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -10520,6 +10520,7 @@ TclNRCoroutineObjCmd( corPtr->running.lineLABCPtr = corPtr->lineLABCPtr; corPtr->stackLevel = NULL; corPtr->auxNumLevels = 0; + corPtr->yieldPtr = NULL; /* * Create the coro's execEnv, switch to it to push the exit and coro -- cgit v0.12 From 8ea5f2cfcee413e2281a0434827d999f43743f6c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 27 Jan 2023 08:17:34 +0000 Subject: Update fcopy documentation regarding -size argument --- doc/fcopy.n | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/fcopy.n b/doc/fcopy.n index d39c803..57f9968 100644 --- a/doc/fcopy.n +++ b/doc/fcopy.n @@ -26,8 +26,9 @@ network sockets. The \fBfcopy\fR command transfers data from \fIinchan\fR until end of file or \fIsize\fR bytes or characters have been -transferred; \fIsize\fR is in bytes if the two channels are using the -same encoding, and is in characters otherwise. +transferred; \fIsize\fR is in bytes if the input channel is in binary mode, +or if the two channels are using the same encoding and -strict is not specified. +Otherwise, size is in characters. If no \fB\-size\fR argument is given, then the copy goes until end of file. All the data read from \fIinchan\fR is copied to \fIoutchan\fR. -- cgit v0.12 From 06de3a9ba1e4397a226e168a72c9da63c2a6f30a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 27 Jan 2023 14:23:08 +0000 Subject: new testcase encoding-18.1. Testcase cleanup --- tests/encoding.test | 151 ++++++++++++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 75 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index aaba01e..f558e01 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -3,7 +3,7 @@ # No output means no errors were found. # # Copyright (c) 1997 Sun Microsystems, Inc. -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -65,12 +65,12 @@ test encoding-1.2 {Tcl_GetEncoding: existing encoding} {testencoding} { return $x } {{fromutf }} test encoding-1.3 {Tcl_GetEncoding: load encoding} { - list [encoding convertto jis0208 \u4e4e] \ + list [encoding convertto jis0208 \u4E4E] \ [encoding convertfrom jis0208 8C] -} "8C \u4e4e" +} "8C \u4E4E" test encoding-2.1 {Tcl_FreeEncoding: refcount == 0} { - encoding convertto jis0208 \u4e4e + encoding convertto jis0208 \u4E4E } {8C} test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup { set system [encoding system] @@ -78,15 +78,15 @@ test encoding-2.2 {Tcl_FreeEncoding: refcount != 0} -setup { } -constraints {testencoding} -body { encoding system shiftjis ;# incr ref count encoding dirs [list [pwd]] - set x [encoding convertto shiftjis \u4e4e] ;# old one found + set x [encoding convertto shiftjis \u4E4E] ;# old one found encoding system iso8859-1 llength shiftjis ;# Shimmer away any cache of Tcl_Encoding - lappend x [catch {encoding convertto shiftjis \u4e4e} msg] $msg + lappend x [catch {encoding convertto shiftjis \u4E4E} msg] $msg } -cleanup { encoding system iso8859-1 encoding dirs $path encoding system $system -} -result "\u008c\u00c1 1 {unknown encoding \"shiftjis\"}" +} -result "\x8C\xC1 1 {unknown encoding \"shiftjis\"}" test encoding-3.1 {Tcl_GetEncodingName, NULL} -setup { set old [encoding system] @@ -138,7 +138,7 @@ test encoding-5.1 {Tcl_SetSystemEncoding} -setup { set old [encoding system] } -body { encoding system jis0208 - encoding convertto \u4e4e + encoding convertto \u4E4E } -cleanup { encoding system iso8859-1 encoding system $old @@ -170,7 +170,7 @@ test encoding-6.2 {Tcl_CreateEncoding: replace encoding} {testencoding} { test encoding-7.1 {Tcl_ExternalToUtfDString: small buffer} { encoding convertfrom jis0208 8c8c8c8c -} "\u543e\u543e\u543e\u543e" +} "\u543E\u543E\u543E\u543E" test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { set a 8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C8C append a $a @@ -179,12 +179,12 @@ test encoding-7.2 {Tcl_UtfToExternalDString: big buffer} { append a $a set x [encoding convertfrom jis0208 $a] list [string length $x] [string index $x 0] -} "512 \u4e4e" +} "512 \u4E4E" test encoding-8.1 {Tcl_ExternalToUtf} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding iso8859-1 - puts -nonewline $f "ab\x8c\xc1g" + puts -nonewline $f "ab\x8C\xC1g" close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding shiftjis @@ -192,13 +192,13 @@ test encoding-8.1 {Tcl_ExternalToUtf} { close $f file delete [file join [temporaryDirectory] dummy] return $x -} "ab\u4e4eg" +} "ab\u4E4Eg" test encoding-9.1 {Tcl_UtfToExternalDString: small buffer} { - encoding convertto jis0208 "\u543e\u543e\u543e\u543e" + encoding convertto jis0208 "\u543E\u543E\u543E\u543E" } {8c8c8c8c} test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { - set a \u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e\u4e4e + set a \u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E\u4E4E append a $a append a $a append a $a @@ -212,7 +212,7 @@ test encoding-9.2 {Tcl_UtfToExternalDString: big buffer} { test encoding-10.1 {Tcl_UtfToExternal} { set f [open [file join [temporaryDirectory] dummy] w] fconfigure $f -translation binary -encoding shiftjis - puts -nonewline $f "ab\u4e4eg" + puts -nonewline $f "ab\u4E4Eg" close $f set f [open [file join [temporaryDirectory] dummy] r] fconfigure $f -translation binary -encoding iso8859-1 @@ -220,7 +220,7 @@ test encoding-10.1 {Tcl_UtfToExternal} { close $f file delete [file join [temporaryDirectory] dummy] return $x -} "ab\x8c\xc1g" +} "ab\x8C\xC1g" proc viewable {str} { set res "" @@ -228,7 +228,7 @@ proc viewable {str} { if {[string is print $c] && [string is ascii $c]} { append res $c } else { - append res "\\u[format %4.4x [scan $c %c]]" + append res "\\u[format %4.4X [scan $c %c]]" } } return "$str ($res)" @@ -240,26 +240,26 @@ test encoding-11.1 {LoadEncodingFile: unknown encoding} {testencoding} { encoding system iso8859-1 encoding dirs {} llength jis0208 ;# Shimmer any cached Tcl_Encoding in shared literal - set x [list [catch {encoding convertto jis0208 \u4e4e} msg] $msg] + set x [list [catch {encoding convertto jis0208 \u4E4E} msg] $msg] encoding dirs $path encoding system $system - lappend x [encoding convertto jis0208 \u4e4e] + lappend x [encoding convertto jis0208 \u4E4E] } {1 {unknown encoding "jis0208"} 8C} test encoding-11.2 {LoadEncodingFile: single-byte} { - encoding convertfrom jis0201 \xa1 -} "\uff61" + encoding convertfrom jis0201 \xA1 +} \uFF61 test encoding-11.3 {LoadEncodingFile: double-byte} { encoding convertfrom jis0208 8C -} "\u4e4e" +} \u4E4E test encoding-11.4 {LoadEncodingFile: multi-byte} { - encoding convertfrom shiftjis \x8c\xc1 -} "\u4e4e" + encoding convertfrom shiftjis \x8C\xC1 +} \u4E4E test encoding-11.5 {LoadEncodingFile: escape file} { - viewable [encoding convertto iso2022 \u4e4e] -} [viewable "\x1b\$B8C\x1b(B"] + viewable [encoding convertto iso2022 \u4E4E] +} [viewable "\x1B\$B8C\x1B(B"] test encoding-11.5.1 {LoadEncodingFile: escape file} { - viewable [encoding convertto iso2022-jp \u4e4e] -} [viewable "\x1b\$B8C\x1b(B"] + viewable [encoding convertto iso2022-jp \u4E4E] +} [viewable "\x1B\$B8C\x1B(B"] test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} -setup { set system [encoding system] set path [encoding dirs] @@ -273,7 +273,7 @@ test encoding-11.6 {LoadEncodingFile: invalid file} -constraints {testencoding} fconfigure $f -translation binary puts $f "abcdefghijklmnop" close $f - encoding convertto splat \u4e4e + encoding convertto splat \u4E4E } -returnCodes error -cleanup { file delete [file join [temporaryDirectory] tmp encoding splat.enc] removeDirectory [file join tmp encoding] @@ -289,45 +289,45 @@ test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 \u0120] append x [encoding convertto iso8859-3 \xD5] append x [encoding convertfrom iso8859-3 \xD5] -} "\xd5?\u120" +} \xD5?\u0120 test encoding-12.2 {LoadTableEncoding: single-byte encoding} { set x [encoding convertto iso8859-3 ab\u0120g] append x [encoding convertfrom iso8859-3 ab\xD5g] -} "ab\xd5gab\u120g" +} ab\xD5gab\u0120g test encoding-12.3 {LoadTableEncoding: multi-byte encoding} { set x [encoding convertto shiftjis ab\u4E4Eg] - append x [encoding convertfrom shiftjis ab\x8c\xc1g] -} "ab\x8c\xc1gab\u4e4eg" + append x [encoding convertfrom shiftjis ab\x8C\xC1g] +} ab\x8C\xC1gab\u4E4Eg test encoding-12.4 {LoadTableEncoding: double-byte encoding} { - set x [encoding convertto jis0208 \u4e4e\u3b1] + set x [encoding convertto jis0208 \u4E4E\u03B1] append x [encoding convertfrom jis0208 8C&A] -} "8C&A\u4e4e\u3b1" +} 8C&A\u4E4E\u03B1 test encoding-12.5 {LoadTableEncoding: symbol encoding} { - set x [encoding convertto symbol \u3b3] - append x [encoding convertto symbol \u67] - append x [encoding convertfrom symbol \x67] -} "\x67\x67\u3b3" + set x [encoding convertto symbol \u03B3] + append x [encoding convertto symbol g] + append x [encoding convertfrom symbol g] +} gg\u03B3 test encoding-12.6 {LoadTableEncoding: overflow in char value} ucs2 { encoding convertto iso8859-3 \U010000 -} "?" +} ? test encoding-13.1 {LoadEscapeTable} { - viewable [set x [encoding convertto iso2022 ab\u4e4e\u68d9g]] -} [viewable "ab\x1b\$B8C\x1b\$\(DD%\x1b(Bg"] + viewable [set x [encoding convertto iso2022 ab\u4E4E\u68D9g]] +} [viewable "ab\x1B\$B8C\x1B\$\(DD%\x1B(Bg"] test encoding-14.1 {BinaryProc} { encoding convertto identity \x12\x34\x56\xff\x69 } "\x12\x34\x56\xc3\xbf\x69" test encoding-15.1 {UtfToUtfProc} { - encoding convertto utf-8 \xa3 -} "\xc2\xa3" + encoding convertto utf-8 \xA3 +} "\xC2\xA3" test encoding-15.2 {UtfToUtfProc null character output} testbytestring { - binary scan [testbytestring [encoding convertto utf-8 \u0000]] H* z + binary scan [testbytestring [encoding convertto utf-8 \x00]] H* z set z } 00 test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { - set y [encoding convertfrom utf-8 [encoding convertto utf-8 \u0000]] + set y [encoding convertfrom utf-8 [encoding convertto utf-8 \x00]] binary scan [teststringbytes $y] H* z set z } c080 @@ -407,18 +407,18 @@ test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" -test encoding-16.1 {UnicodeToUtfProc} { +test encoding-16.1 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode NN] - list $val [format %x [scan $val %c]] -} "\u4e4e 4e4e" + list $val [format %X [scan $val %c]] +} -result "\u4E4E 4E4E" test encoding-16.2 {UnicodeToUtfProc} -constraints fullutf -body { set val [encoding convertfrom unicode "\xD8\xD8\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xDC\xDC"] - list $val [format %x [scan $val %c]] -} -result "\uDCDC dcdc" + list $val [format %X [scan $val %c]] +} -result "\uDCDC DCDC" test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body { encoding convertto unicode "\U460DC" @@ -430,8 +430,9 @@ test encoding-17.3 {UtfToUnicodeProc} -body { encoding convertto unicode "\uD8D8" } -result "\xD8\xD8" -test encoding-18.1 {TableToUtfProc} { -} {} +test encoding-18.1 {TableToUtfProc on invalid input} -body { + list [catch {encoding convertto jis0208 \\} res] $res +} -result {0 !)} test encoding-19.1 {TableFromUtfProc} { } {} @@ -445,11 +446,11 @@ test encoding-21.1 {EscapeToUtfProc} { test encoding-22.1 {EscapeFromUtfProc} { } {} -set iso2022encData "\u001b\$B;d\$I\$b\$G\$O!\"%A%C%W\$49XF~;~\$K\$4EPO?\$\$\$?\$@\$\$\$?\$4=;=j\$r%-%c%C%7%e%\"%&%H\$N:]\$N\u001b(B -\u001b\$B>.@Z.@Z> 8) | 0x80}] [expr {($code & 0xff) | 0x80}] + [expr {($code >> 8) | 0x80}] [expr {($code & 0xFF) | 0x80}] } proc gen-jisx0208-iso2022-jp {code} { binary format a3cca3 \ - "\x1b\$B" [expr {$code >> 8}] [expr {$code & 0xff}] "\x1b(B" + "\x1B\$B" [expr {$code >> 8}] [expr {$code & 0xFF}] "\x1B(B" } proc gen-jisx0208-cp932 {code} { set c1 [expr {($code >> 8) | 0x80}] set c2 [expr {($code & 0xff)| 0x80}] if {$c1 % 2} { - set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x31 : 0x71)}] - incr c2 [expr {- (0x60 + ($c2 < 0xe0))}] + set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x31 : 0x71)}] + incr c2 [expr {- (0x60 + ($c2 < 0xE0))}] } else { - set c1 [expr {($c1 >> 1) + ($c1 < 0xdf ? 0x30 : 0x70)}] + set c1 [expr {($c1 >> 1) + ($c1 < 0xDF ? 0x30 : 0x70)}] incr c2 -2 } binary format cc $c1 $c2 -- cgit v0.12 From 7f87f0f8ea86cf01682eb02e4fa8c313dc6ef4ef Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 27 Jan 2023 19:15:09 +0000 Subject: Add some comments and tidy code. --- generic/tclBasic.c | 3 ++- generic/tclExecute.c | 2 +- generic/tclNamesp.c | 2 ++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index bea5996..cdaf6fe 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -9716,6 +9716,7 @@ TclNRYieldToObjCmd( */ iPtr->execEnvPtr = corPtr->callerEEPtr; + /* Not calling Tcl_IncrRefCount(listPtr) here because listPtr is private */ TclSetTailcall(interp, listPtr); corPtr->yieldPtr = listPtr; iPtr->execEnvPtr = corPtr->eePtr; @@ -9918,8 +9919,8 @@ TclNRCoroutineActivateCallback( if (corPtr->yieldPtr) { for (runPtr = TOP_CB(interp); runPtr; runPtr = runPtr->nextPtr) { if (runPtr->data[1] == corPtr->yieldPtr) { + Tcl_DecrRefCount(runPtr->data[1]); runPtr->data[1] = NULL; - Tcl_DecrRefCount(corPtr->yieldPtr); corPtr->yieldPtr = NULL; break; } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index ec144a2..7ee5471 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2503,8 +2503,8 @@ TEBCresume( * 'yieldParameter'). */ - Tcl_IncrRefCount(valuePtr); iPtr->execEnvPtr = corPtr->callerEEPtr; + Tcl_IncrRefCount(valuePtr); TclSetTailcall(interp, valuePtr); corPtr->yieldPtr = valuePtr; iPtr->execEnvPtr = corPtr->eePtr; diff --git a/generic/tclNamesp.c b/generic/tclNamesp.c index 6269bbe..5a2979e 100644 --- a/generic/tclNamesp.c +++ b/generic/tclNamesp.c @@ -417,6 +417,8 @@ Tcl_PopCallFrame( framePtr->nsPtr = NULL; if (framePtr->tailcallPtr) { + /* Reusing the existing reference count from framePtr->tailcallPtr, so + * no need to Tcl_IncrRefCount(framePtr->tailcallPtr)*/ TclSetTailcall(interp, framePtr->tailcallPtr); } } -- cgit v0.12 From 468b1a434681f98ea64d399abce7ddd8c605617d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 28 Jan 2023 18:50:32 +0000 Subject: Fix "format %c 0x10000041", should give the same answer as in Tcl 8.6 (Handling of TCL_COMBINE flag should not be visible at script level) --- generic/tclStringObj.c | 3 +++ tests/format.test | 3 +++ 2 files changed, 6 insertions(+) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index a041d4c..e1376f4 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2130,6 +2130,9 @@ Tcl_AppendFormatToObj( if (TclGetIntFromObj(interp, segment, &code) != TCL_OK) { goto error; } + if ((unsigned)code > 0x10FFFF) { + code = 0xFFFD; + } length = Tcl_UniCharToUtf(code, buf); #if TCL_UTF_MAX < 4 if ((code >= 0xD800) && (length < 3)) { diff --git a/tests/format.test b/tests/format.test index c47774a..8cabbf1 100644 --- a/tests/format.test +++ b/tests/format.test @@ -402,6 +402,9 @@ test format-8.26 {Undocumented formats} -body { test format-8.27 {Undocumented formats} -constraints pointerIs64bit -body { format "%p %#llx" [expr {2**33}] [expr {2**33}] } -result {0x200000000 0x200000000} +test format-8.28 {Internal use of TCL_COMBINE flag should not be visiable at script level} { + format %c 0x10000041 +} \uFFFD test format-9.1 {long result} { set a {1234567890abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ 1 2 3 4 5 6 7 8 9 0 a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z} -- cgit v0.12 From 5b86b255d41b6a0948597ccc8b7499efde42c4d7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 28 Jan 2023 20:50:36 +0000 Subject: Another situation where TCL_COMBINE handling gives a strange result (utf-32 encoder) --- generic/tclEncoding.c | 10 ++++++++-- tests/encoding.test | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index e548663..46508b7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2505,8 +2505,14 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && ((ch & ~0x7FF) == 0xD800))) { + if ((unsigned)ch > 0x10FFFF) { + if (STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + break; + } + ch = 0xFFFD; + } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + && ((ch & ~0x7FF) == 0xD800)) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; diff --git a/tests/encoding.test b/tests/encoding.test index a6c8a80..1971360 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -482,6 +482,10 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" +test encoding-16.8 {Utf32ToUtfProc} -body { + set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41] + list $val [format %x [scan $val %c]] +} -result "\uFFFD fffd" test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" -- cgit v0.12 From 4d235ca93a0588d63b0b2a0d1cdceb594a1a3a32 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 30 Jan 2023 11:22:28 +0000 Subject: Make Tcl_UniCharToUtf() a little easier to read. --- generic/tclUtf.c | 48 ++++++++++++++++++++++++++---------------------- 1 file changed, 26 insertions(+), 22 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 92bcf4f..ee0724c 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -231,8 +231,8 @@ Tcl_UniCharToUtf( } if (ch >= 0) { if (ch <= 0x7FF) { - buf[1] = (char) ((ch | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) (0x80 | (0x3F & ch)); + buf[0] = (char) (0xC0 | (ch >> 6)); return 2; } if (ch <= 0xFFFF) { @@ -243,10 +243,11 @@ Tcl_UniCharToUtf( ((ch & 0xF800) == 0xD800)) { if (ch & 0x0400) { /* Low surrogate */ - if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) { + if ( (0x80 == (0xC0 & buf[0])) + && (0 == (0xCF & buf[1]))) { /* Previous Tcl_UniChar was a high surrogate, so combine */ - buf[2] = (char) ((ch & 0x3F) | 0x80); - buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80); + buf[2] = (char) (0x80 | (0x3F & ch)); + buf[1] |= (char) (0x80 | (0x0F & (ch >> 6))); return 3; } /* Previous Tcl_UniChar was not a high surrogate, so just output */ @@ -255,38 +256,41 @@ Tcl_UniCharToUtf( ch += 0x40; /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ - buf[2] = (char) ((ch << 4) & 0x30); - buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); - buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); + buf[2] = (char) ( 0x03 & ch); + buf[1] = (char) (0x80 | (0x3F & (ch >> 2))); + buf[0] = (char) (0xF0 | (0x07 & (ch >> 8))); return 1; } } goto three; } if (ch <= 0x10FFFF) { - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 18) | 0xF0); + buf[3] = (char) (0x80 | (0x3F & ch)); + buf[2] = (char) (0xBF & (0x80 | (ch >> 6))); + buf[1] = (char) (0xBF & (0x80 | (ch >> 12))); + buf[0] = (char) (0xF0 | (ch >> 18)); return 4; } } else if (ch == -1) { - if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0) - && ((buf[-1] & 0xF8) == 0xF0)) { - ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2) - + ((buf[1] & 0x30) >> 4); - buf[1] = (char) ((ch | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[-1] = (char) ((ch >> 12) | 0xE0); + if ( (0x80 == (0xC0 & buf[0])) + && (0 == (0xCF & buf[1])) + && (0xF0 == (0xF8 & buf[-1]))) { + ch = 0xD7C0 + + ((0x07 & buf[-1]) << 8) + + ((0x3F & buf[0]) << 2) + + ((0x30 & buf[1]) >> 4); + buf[1] = (char) (0xBF & (0x80 | ch)); + buf[0] = (char) (0xBF & (0x80 | (ch >> 6))); + buf[-1] = (char) (0xE0 | (ch >> 12)); return 2; } } ch = 0xFFFD; three: - buf[2] = (char) ((ch | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 12) | 0xE0); + buf[2] = (char) (0x80 | (0x3F & ch)); + buf[1] = (char) (0x80 | (0x3F & (ch >> 6))); + buf[0] = (char) (0xE0 | (ch >> 12)); return 3; } -- cgit v0.12 From 8c0f76a7de2b22a611f26c3a08a434b5b85ce261 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 30 Jan 2023 11:59:41 +0000 Subject: A few more readability changes to Tcl_UniCharToUtf() jn: Please, don't do that here. Tcl_UniCharToUtf() is shared between 8.6, 8.7 and 9.0. So if you want to make it easier to read, it should be done on all 3 branches. I know you only care about "trunk", but it makes maintenance on 8.6/8.7/9.0 harder than it already is. I don't want to spend time on reviewing such kind of changes, and no-one else is doing it. Thanks for understanding (I hope)! --- generic/tclUtf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ee0724c..ab27f1b 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -266,8 +266,8 @@ Tcl_UniCharToUtf( } if (ch <= 0x10FFFF) { buf[3] = (char) (0x80 | (0x3F & ch)); - buf[2] = (char) (0xBF & (0x80 | (ch >> 6))); - buf[1] = (char) (0xBF & (0x80 | (ch >> 12))); + buf[2] = (char) (0x80 | (0x3F & (ch >> 6))); + buf[1] = (char) (0x80 | (0x3F & (ch >> 12))); buf[0] = (char) (0xF0 | (ch >> 18)); return 4; } @@ -279,8 +279,8 @@ Tcl_UniCharToUtf( + ((0x07 & buf[-1]) << 8) + ((0x3F & buf[0]) << 2) + ((0x30 & buf[1]) >> 4); - buf[1] = (char) (0xBF & (0x80 | ch)); - buf[0] = (char) (0xBF & (0x80 | (ch >> 6))); + buf[1] = (char) (0x80 | (0x3F & ch)); + buf[0] = (char) (0x80 | (0x3F & (ch >> 6))); buf[-1] = (char) (0xE0 | (ch >> 12)); return 2; } -- cgit v0.12 From d3dae9eba4c8d979d72feddfded60eb08835543d Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 30 Jan 2023 15:37:41 +0000 Subject: silence compiler warning --- generic/tclBasic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index cdaf6fe..a31bfb6 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -9919,7 +9919,7 @@ TclNRCoroutineActivateCallback( if (corPtr->yieldPtr) { for (runPtr = TOP_CB(interp); runPtr; runPtr = runPtr->nextPtr) { if (runPtr->data[1] == corPtr->yieldPtr) { - Tcl_DecrRefCount(runPtr->data[1]); + Tcl_DecrRefCount((Tcl_Obj *)runPtr->data[1]); runPtr->data[1] = NULL; corPtr->yieldPtr = NULL; break; -- cgit v0.12 -- cgit v0.12 From 0219b700b196373b550711d430ce8e1106869b7d Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 30 Jan 2023 19:37:35 +0000 Subject: Update code comments for Tcl_UniCharToUtf(). --- generic/tclUtf.c | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ab27f1b..0c16f27 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -182,25 +182,22 @@ Invalid( * * Tcl_UniCharToUtf -- * - * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the - * provided buffer. Equivalent to Plan 9 runetochar(). + * Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the provided + * buffer. Equivalent to Plan 9 runetochar(). * - * Special handling of Surrogate pairs is handled as follows: - * When this function is called for ch being a high surrogate, - * the first byte of the 4-byte UTF-8 sequence is produced and + * When this function is called and ch is a high surrogate, + * the first byte of the 4-byte UTF-8 sequence is produced, and * the function returns 1. Calling the function again with a * low surrogate, the remaining 3 bytes of the 4-byte UTF-8 * sequence is produced, and the function returns 3. The buffer * is used to remember the high surrogate between the two calls. * - * If no low surrogate follows the high surrogate (which is actually - * illegal), this can be handled reasonably by calling Tcl_UniCharToUtf - * again with ch = -1. This will produce a 3-byte UTF-8 sequence - * representing the high surrogate. + * If no low surrogate follows the high surrogate (which is actually illegal), + * calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8 + * sequence representing the high surrogate. * * Results: - * The return values is the number of bytes in the buffer that were - * consumed. + * Returns the number of bytes populated in the buffer. * * Side effects: * None. @@ -211,12 +208,13 @@ Invalid( #undef Tcl_UniCharToUtf size_t Tcl_UniCharToUtf( - int ch, /* The Tcl_UniChar to be stored in the - * buffer. Can be or'ed with flag TCL_COMBINE */ - char *buf) /* Buffer in which the UTF-8 representation of - * the Tcl_UniChar is stored. Buffer must be - * large enough to hold the UTF-8 character - * (at most 4 bytes). */ + int ch, /* The Tcl_UniChar to be stored in the + * buffer. Can be or'ed with flag TCL_COMBINE + */ + char *buf) /* Buffer in which the UTF-8 representation of + * ch is stored. Must be large enough to hold the UTF-8 + * character (at most 4 bytes). + */ { #if TCL_UTF_MAX > 3 int flags = ch; @@ -253,7 +251,12 @@ Tcl_UniCharToUtf( /* Previous Tcl_UniChar was not a high surrogate, so just output */ } else { /* High surrogate */ + + /* Add 0x10000 to the raw number encoded in the surrogate + * pair in order to get the code point. + */ ch += 0x40; + /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ buf[2] = (char) ( 0x03 & ch); -- cgit v0.12 From 64e3a23bfdcfbe7b66872c58d095aa6e1868f95e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 31 Jan 2023 07:49:16 +0000 Subject: SetFlag -> GotFlag (since SetFlag always returns 1, this is nonsence in an if() statement). Also add test-case, showing that it was actually wrong, in behavior too. --- generic/tclIO.c | 2 +- tests/ioCmd.test | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 47040d5..fed469c 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8354,7 +8354,7 @@ Tcl_SetChannelOption( #ifdef TCL_NO_DEPRECATED ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); #else - if (SetFlag(statePtr, CHANNEL_ENCODING_STRICT)) { + if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT) != CHANNEL_ENCODING_STRICT) { if (interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "bad value for -nocomplainencoding: only true allowed", diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 73f0e1c..1a72f70 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -376,6 +376,16 @@ test iocmd-8.21 {fconfigure command / -nocomplainencoding 0 error} -constraints } -body { fconfigure $console -nocomplainencoding 0 } -returnCodes error -result "bad value for -nocomplainencoding: only true allowed" +test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strictencoding already defined} -setup { + set console stdin + set oldmode [fconfigure $console -strictencoding] +} -body { + fconfigure $console -strictencoding 1 + fconfigure $console -nocomplainencoding 0 + fconfigure $console -nocomplainencoding +} -cleanup { + fconfigure $console -strictencoding $oldmode +} -result 0 test iocmd-9.1 {eof command} { -- cgit v0.12 From 596e33bb9ec8c0083b2d6234c84afc293e525d24 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 31 Jan 2023 17:30:59 +0000 Subject: Remove incorrect comment. See: [https://www.magicsplat.com/tcl9/tcl9unicode.html#surrogates-as-literals]. Thanks, Ashok, for noticing this! --- doc/Tcl.n | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f46f73..8e0b342 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -221,12 +221,6 @@ twenty-one-bit hexadecimal value for the Unicode character that will be inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. -.RS -.PP -The range U+00D800\(enU+00DFFF is reserved for surrogates, which -are illegal on their own. Therefore, such sequences will result in -the replacement character U+FFFD. Surrogate pairs should be -encoded as single \e\fBU\fIhhhhhhhh\fR character. .RE .PP Backslash substitution is not performed on words enclosed in braces, -- cgit v0.12 From 078a694834be8669ba6f79def0adbb61afacc0e2 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 31 Jan 2023 22:15:02 +0000 Subject: Fix error introduced in [3e5e37f83b058f3d] for Tcl_UniCharToUtf, and add test. --- generic/tclUtf.c | 2 +- tests/encoding.test | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index ab27f1b..bef32f0 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -256,7 +256,7 @@ Tcl_UniCharToUtf( ch += 0x40; /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ - buf[2] = (char) ( 0x03 & ch); + buf[2] = (char) ((ch << 4) & 0x30); buf[1] = (char) (0x80 | (0x3F & (ch >> 2))); buf[0] = (char) (0xF0 | (0x07 & (ch >> 8))); return 1; diff --git a/tests/encoding.test b/tests/encoding.test index 1971360..8351c91 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -487,6 +487,30 @@ test encoding-16.8 {Utf32ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" +test encoding-16.8 { + Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 +} -body { + apply [list {} { + for {set i 0xD800} {$i < 0xDBFF} {incr i} { + for {set j 0xDC00} {$j < 0xDFFF} {incr j} { + set string [binary format S2 [list $i $j]] + set status [catch { + set decoded [encoding convertfrom utf-16be $string] + set encoded [encoding convertto utf-16be $decoded] + }] + if {$status || ( $encoded ne $string )} { + return [list [format %x $i] [format %x $j]] + } + } + } + return done + } [namespace current]] +} -result done + + + + + test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" -- cgit v0.12 From 5866ef6d2acf4db24499c820df08a8feb88ea865 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 1 Feb 2023 07:29:48 +0000 Subject: (Cherry-pick) Make Tcl_UniCharToUtf more readable. --- generic/tclEncoding.c | 6 +++--- generic/tclUtf.c | 57 +++++++++++++++++++++++++++------------------------ 2 files changed, 33 insertions(+), 30 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fe2b55b..dfa7907 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2244,9 +2244,9 @@ UtfExtToUtfIntProc( * * UtfToUtfProc -- * - * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation - * is not a no-op, because it will turn a stream of improperly formed - * UTF-8 into a properly formed stream. + * Converts from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation + * is not a no-op, because it turns a stream of improperly formed + * UTF-8 into a properly-formed stream. * * Results: * Returns TCL_OK if conversion was successful. diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 8931b39..e4d0fc8 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -206,12 +206,11 @@ Invalid( * * Tcl_UniCharToUtf -- * - * Store the given Tcl_UniChar as a sequence of UTF-8 bytes in the + * Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the * provided buffer. Equivalent to Plan 9 runetochar(). * * Results: - * The return values is the number of bytes in the buffer that were - * consumed. + * Returns the number of bytes stored into the buffer. * * Side effects: * None. @@ -234,8 +233,8 @@ Tcl_UniCharToUtf( } if (ch >= 0) { if (ch <= 0x7FF) { - buf[1] = (char) ((ch | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 6) | 0xC0); + buf[1] = (char) (0x80 | (0x3F & ch)); + buf[0] = (char) (0xC0 | (ch >> 6)); return 2; } if (ch <= 0xFFFF) { @@ -243,10 +242,11 @@ Tcl_UniCharToUtf( if ((ch & 0xF800) == 0xD800) { if (ch & 0x0400) { /* Low surrogate */ - if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0)) { + if ( (0x80 == (0xC0 & buf[0])) + && (0 == (0xCF & buf[1]))) { /* Previous Tcl_UniChar was a high surrogate, so combine */ - buf[2] = (char) ((ch & 0x3F) | 0x80); - buf[1] |= (char) (((ch >> 6) & 0x0F) | 0x80); + buf[2] = (char) (0x80 | (0x3F & ch)); + buf[1] |= (char) (0x80 | (0x0F & (ch >> 6))); return 3; } /* Previous Tcl_UniChar was not a high surrogate, so just output */ @@ -256,8 +256,8 @@ Tcl_UniCharToUtf( /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ buf[2] = (char) ((ch << 4) & 0x30); - buf[1] = (char) (((ch >> 2) & 0x3F) | 0x80); - buf[0] = (char) (((ch >> 8) & 0x07) | 0xF0); + buf[1] = (char) (0x80 | (0x3F & (ch >> 2))); + buf[0] = (char) (0xF0 | (0x07 & (ch >> 8))); return 1; } } @@ -267,20 +267,23 @@ Tcl_UniCharToUtf( #if TCL_UTF_MAX > 3 if (ch <= 0x10FFFF) { - buf[3] = (char) ((ch | 0x80) & 0xBF); - buf[2] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 12) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 18) | 0xF0); + buf[3] = (char) (0x80 | (0x3F & ch)); + buf[2] = (char) (0x80 | (0x3F & (ch >> 6))); + buf[1] = (char) (0x80 | (0x3F & (ch >> 12))); + buf[0] = (char) (0xF0 | (ch >> 18)); return 4; } } else if (ch == -1) { - if (((buf[0] & 0xC0) == 0x80) && ((buf[1] & 0xCF) == 0) - && ((buf[-1] & 0xF8) == 0xF0)) { - ch = 0xD7C0 + ((buf[-1] & 0x07) << 8) + ((buf[0] & 0x3F) << 2) - + ((buf[1] & 0x30) >> 4); - buf[1] = (char) ((ch | 0x80) & 0xBF); - buf[0] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[-1] = (char) ((ch >> 12) | 0xE0); + if ( (0x80 == (0xC0 & buf[0])) + && (0 == (0xCF & buf[1])) + && (0xF0 == (0xF8 & buf[-1]))) { + ch = 0xD7C0 + + ((0x07 & buf[-1]) << 8) + + ((0x3F & buf[0]) << 2) + + ((0x30 & buf[1]) >> 4); + buf[1] = (char) (0x80 | (0x3F & ch)); + buf[0] = (char) (0x80 | (0x3F & (ch >> 6))); + buf[-1] = (char) (0xE0 | (ch >> 12)); return 2; } #endif @@ -288,9 +291,9 @@ Tcl_UniCharToUtf( ch = 0xFFFD; three: - buf[2] = (char) ((ch | 0x80) & 0xBF); - buf[1] = (char) (((ch >> 6) | 0x80) & 0xBF); - buf[0] = (char) ((ch >> 12) | 0xE0); + buf[2] = (char) (0x80 | (0x3F & ch)); + buf[1] = (char) (0x80 | (0x3F & (ch >> 6))); + buf[0] = (char) (0xE0 | (ch >> 12)); return 3; } @@ -2386,7 +2389,7 @@ TclUniCharMatch( * * TclUtfToUCS4 -- * - * Extract the 4-byte codepoint from the leading bytes of the + * Extracts the 4-byte codepoint from the leading bytes of the * Modified UTF-8 string "src". This is a utility routine to * contain the surrogate gymnastics in one place. * @@ -2398,8 +2401,8 @@ TclUniCharMatch( * enough bytes remain in the string. * * Results: - * *usc4Ptr is filled with the UCS4 code point, and the return value is - * the number of bytes from the UTF-8 string that were consumed. + * Fills *usc4Ptr with the UCS4 code point and returns the number of bytes + * consumed from the source string. * * Side effects: * None. -- cgit v0.12 From 3eaad4bbc95c9cb3eaaf79872646d4fa7f6d8c6e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 1 Feb 2023 08:10:12 +0000 Subject: (cherry-pick) Make Tcl_UniCharToUtf more readable and add test to exercise surrogate handling. (test-case was still missing, which cannot be used in Tcl 8.6) --- generic/tclUtf.c | 14 ++++++-------- tests/encoding.test | 24 ++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index db2be84..cb8bb3e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -185,17 +185,15 @@ Invalid( * Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the * provided buffer. Equivalent to Plan 9 runetochar(). * - * Special handling of Surrogate pairs is handled as follows: - * When this function is called for ch being a high surrogate, - * the first byte of the 4-byte UTF-8 sequence is produced and - * the function returns 1. Calling the function again with a - * low surrogate, the remaining 3 bytes of the 4-byte UTF-8 - * sequence is produced, and the function returns 3. The buffer - * is used to remember the high surrogate between the two calls. + * Surrogate pairs are handled as follows: When ch is a high surrogate, + * the first byte of the 4-byte UTF-8 sequence is stored in the buffer and + * the function returns 1. If the function is called again with a low + * surrogate and the same buffer, the remaining 3 bytes of the 4-byte + * UTF-8 sequence are produced. * * If no low surrogate follows the high surrogate (which is actually * illegal), this can be handled reasonably by calling Tcl_UniCharToUtf - * again with ch = -1. This will produce a 3-byte UTF-8 sequence + * again with ch = -1. This produces a 3-byte UTF-8 sequence * representing the high surrogate. * * Results: diff --git a/tests/encoding.test b/tests/encoding.test index 10a37f8..ae6c78a 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -482,6 +482,30 @@ test encoding-16.7 {Utf32ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "乎 4e4e" +test encoding-16.8 { + Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 +} -body { + apply [list {} { + for {set i 0xD800} {$i < 0xDBFF} {incr i} { + for {set j 0xDC00} {$j < 0xDFFF} {incr j} { + set string [binary format S2 [list $i $j]] + set status [catch { + set decoded [encoding convertfrom utf-16be $string] + set encoded [encoding convertto utf-16be $decoded] + }] + if {$status || ( $encoded ne $string )} { + return [list [format %x $i] [format %x $j]] + } + } + } + return done + } [namespace current]] +} -result done + + + + + test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" -- cgit v0.12 From b9c893dbc940d680560b5cc10b414c702d845004 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 1 Feb 2023 21:01:51 +0000 Subject: Renumber testscase, sync with Tcl 9.0 --- tests/encoding.test | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index ae6c78a..05d9918 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -481,8 +481,12 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" +test encoding-16.8 {Utf32ToUtfProc} -body { + set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41] + list $val [format %x [scan $val %c]] +} -result "\uFFFD fffd" -test encoding-16.8 { +test encoding-16.9 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 } -body { apply [list {} { @@ -930,7 +934,9 @@ test encoding-27.2 {encoding dirs basic behavior} -returnCodes error -body { test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { - incr count + if {$name ne "unicode"} { + incr count + } encoding convertto -nocomplain $name $string # discard the cached internal representation of Tcl_Encoding @@ -938,7 +944,7 @@ test encoding-28.0 {all encodings load} -body { llength $name } return $count -} -result [expr {[info exists ::tcl_precision] ? 92 : 91}] +} -result 91 runtests -- cgit v0.12 From de0a637d7c24faa768c266bacda17bf6ac48171d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 2 Feb 2023 07:12:34 +0000 Subject: Fix documentation on "encoding" command --- doc/encoding.n | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index 9577da3..4ad2824 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -39,9 +39,9 @@ system encoding is used. .VS "TCL8.7 TIP346, TIP607, TIP601" .PP .RS -The command does not fail on encoding errors. Instead, any not convertable bytes -(like incomplete UTF-8 sequences, see example below) are put as byte values into -the output stream. +The command does not fail on encoding errors (unless \fB-strict\fR is specified). +Instead, any not convertable bytes (like incomplete UTF-8 sequences, see example +below) are put as byte values into the output stream. .PP If the option \fB-failindex\fR with a variable name is given, the error reporting is changed in the following manner: @@ -51,9 +51,8 @@ converted characters until the first error position. In case of no error, the value \fI-1\fR is written to the variable. This option may not be used together with \fB-nocomplain\fR. .PP -The option \fB-nocomplain\fR has no effect and is available for compatibility with -TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. -This switch restores the TCL8.7 behaviour. +The option \fB-nocomplain\fR has no effect, but assures to get the same result +in Tcl 9. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR encoder, it disallows invalid byte sequences and surrogates (which - @@ -74,9 +73,9 @@ specified, the current system encoding is used. .VS "TCL8.7 TIP346, TIP607, TIP601" .PP .RS -The command does not fail on encoding errors. Instead, the replacement character -\fB?\fR is output for any not representable character (like the dot \fB\\U2022\fR -in \fBiso-8859-1\fR encoding, see example below). +The command does not fail on encoding errors (unless \fB-strict\fR is specified). +Instead, the replacement character \fB?\fR is output for any not representable +character (like the dot \fB\\U2022\fR in \fBiso-8859-1\fR encoding, see example below). .PP If the option \fB-failindex\fR with a variable name is given, the error reporting is changed in the following manner: @@ -86,9 +85,8 @@ converted bytes until the first error position. No error condition is raised. In case of no error, the value \fI-1\fR is written to the variable. This option may not be used together with \fB-nocomplain\fR. .PP -The option \fB-nocomplain\fR has no effect and is available for compatibility with -TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue. -This switch restores the TCL8.7 behaviour. +The option \fB-nocomplain\fR has no effect, but assures to get the same result +in Tcl 9. .PP The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR encoder, it disallows surrogates (which - otherwise - are just passed through). This @@ -157,7 +155,7 @@ Example 4: detect the error location while transforming to ISO8859-1 (ISO-Latin 1): .PP .CS -% set s [\fBencoding convertto\fR -failindex i utf-8 "A\eu0141"] +% set s [\fBencoding convertto\fR -failindex i iso8859-1 "A\eu0141"] A % set i 1 @@ -166,11 +164,11 @@ A Example 5: replace a not representable character by the replacement character: .PP .CS -% set s [\fBencoding convertto\fR -nocomplain utf-8 "A\eu0141"] +% set s [\fBencoding convertto\fR -nocomplain iso8859-1 "A\eu0141"] A? .CE The option \fB-nocomplain\fR has no effect, but assures to get the same result -with TCL9. +in Tcl 9. .VE "TCL8.7 TIP346, TIP607, TIP601" .PP .SH "SEE ALSO" -- cgit v0.12 From 637e7224c9b4c5bde7709455dc262bdf476f9b4d Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Feb 2023 11:52:31 +0000 Subject: Replace encoding -strict etc. with -profile --- generic/tclCmdAH.c | 325 +++++++++++++++++++++++++++++--------------------- generic/tclEncoding.c | 34 ++++++ generic/tclInt.h | 20 ++++ tests/encoding.test | 132 ++++++++++---------- 4 files changed, 310 insertions(+), 201 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4f743cc..818159d 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -527,6 +527,137 @@ TclInitEncodingCmd( } /* + *------------------------------------------------------------------------ + * + * EncodingConvertParseOptions -- + * + * Common routine for parsing arguments passed to encoding convertfrom + * and encoding convertto. + * + * Results: + * TCL_OK or TCL_ERROR. + * + * Side effects: + * On success, + * - *encPtr is set to the encoding. Must be freed with Tcl_FreeEncoding + * if non-NULL + * - *dataObjPtr is set to the Tcl_Obj containing the data to encode or + * decode + * - *flagsPtr is set to encoding error handling flags + * - *failVarPtr is set to -failindex option value or NULL + * On error, all of the above are uninitialized. + * + *------------------------------------------------------------------------ + */ +static int +EncodingConvertParseOptions ( + Tcl_Interp *interp, /* For error messages. May be NULL */ + int objc, /* Number of arguments */ + Tcl_Obj *const objv[], /* Argument objects as passed to command. */ + int isEncoder, /* 1 -> convertto, 0 -> convertfrom */ + Tcl_Encoding *encPtr, /* Where to store the encoding */ + Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */ + int *flagsPtr, /* Bit mask of encoding option flags */ + Tcl_Obj **failVarPtr /* Where to store -failindex option value */ +) +{ + static const char *const options[] = {"-profile", "-failindex", NULL}; + enum convertfromOptions { PROFILE, FAILINDEX } optIndex; + enum TclEncodingProfile profile; + Tcl_Encoding encoding; + Tcl_Obj *dataObj; + Tcl_Obj *failVarObj; +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + int flags = TCL_ENCODING_STOPONERROR; +#else + int flags = TCL_ENCODING_NOCOMPLAIN; +#endif + + /* + * Possible combinations: + * 1) data -> objc = 2 + * 2) ?options? encoding data -> objc >= 3 + * It is intentional that specifying option forces encoding to be + * specified. Less prone to user error. This should have always been + * the case even in 8.6 imho where there were no options (ie (1) + * should never have been allowed) + */ + + if (objc == 1) { +numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ + Tcl_WrongNumArgs( + interp, + 1, + objv, + "??-profile profile? ?-failindex var? ?encoding?? data"); + return TCL_ERROR; + } + + failVarObj = NULL; + if (objc == 2) { + encoding = Tcl_GetEncoding(interp, NULL); + dataObj = objv[1]; + } else { + int argIndex; + for (argIndex = 1; argIndex < (objc-2); ++argIndex) { + if (Tcl_GetIndexFromObj( + interp, objv[argIndex], options, "option", 0, &optIndex) + != TCL_OK) { + return TCL_ERROR; + } + if (++argIndex == (objc - 2)) { + goto numArgsError; + } + switch (optIndex) { + case PROFILE: + if (TclEncodingProfileParseName( + interp, objv[argIndex], &profile) + != TCL_OK) { + return TCL_ERROR; + } + switch (profile) { + case TCL_ENCODING_PROFILE_TCL8: + flags = TCL_ENCODING_NOCOMPLAIN; + break; + case TCL_ENCODING_PROFILE_STRICT: + flags = TCL_ENCODING_STRICT; + break; + case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ + default: + break; + } + break; + case FAILINDEX: + failVarObj = objv[argIndex]; + break; + } + } + /* Get encoding after opts so no need to free it on option error */ + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) + != TCL_OK) { + return TCL_ERROR; + } + dataObj = objv[objc - 1]; + } + + /* -failindex forces checking*/ + if (failVarObj != NULL && flags == TCL_ENCODING_NOCOMPLAIN) { + /* + * Historical, but I really don't like this mixing of defines + * from two different bit mask domains - ENCODING_FAILINDEX + */ + flags = isEncoder ? TCL_ENCODING_STOPONERROR : ENCODING_FAILINDEX; + } + + *encPtr = encoding; + *dataObjPtr = dataObj; + *flagsPtr = flags; + *failVarPtr = failVarObj; + + return TCL_OK; +} + +/* *---------------------------------------------------------------------- * * EncodingConvertfromObjCmd -- @@ -559,78 +690,73 @@ EncodingConvertfromObjCmd( #endif int result; Tcl_Obj *failVarObj = NULL; + static const char *const options[] = {"-profile", "-failindex", NULL}; + enum convertfromOptions { PROFILE, FAILINDEX } optIndex; + enum TclEncodingProfile profile; + /* - * Decode parameters: * Possible combinations: * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -strict data -> objc = 3 - * 6) -strict encoding data -> objc = 4 - * 7) -failindex val data -> objc = 4 - * 8) -failindex val encoding data -> objc = 5 + * 2) ?options? encoding data -> objc >= 3 + * It is intentional that specifying option forces encoding to be + * specified. Less prone to user error. This should have always been + * the case even in 8.6 imho where there were no options (ie (1) + * should never have been allowed) */ - if (objc == 2) { + if (objc == 1) { +numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ + Tcl_WrongNumArgs( + interp, + 1, + objv, + "??-profile profile? ?-failindex var? ?encoding?? data"); + return TCL_ERROR; + } + else if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc > 2 && objc < 7) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - bytesPtr = Tcl_GetString(objv[1]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - bytesPtr = Tcl_GetString(objv[2]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < 6) { - goto encConvFromError; - } - failVarObj = objv[3]; - objcUnprocessed -= 2; - } - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvFromError; + } else { + int argIndex; + for (argIndex = 1; argIndex < (objc-2); ++argIndex) { + if (Tcl_GetIndexFromObj( + interp, objv[argIndex], options, "option", 0, &optIndex) + != TCL_OK) { + return TCL_ERROR; } - failVarObj = objv[2]; - flags = ENCODING_FAILINDEX; - objcUnprocessed -= 2; - bytesPtr = Tcl_GetString(objv[3]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed --; - } - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + if (++argIndex == (objc - 2)) { + goto numArgsError; + } + switch (optIndex) { + case PROFILE: + if (TclEncodingProfileParseName( + interp, objv[argIndex], &profile) + != TCL_OK) { return TCL_ERROR; } + switch (profile) { + case TCL_ENCODING_PROFILE_TCL8: + flags = TCL_ENCODING_NOCOMPLAIN; + break; + case TCL_ENCODING_PROFILE_STRICT: + flags = TCL_ENCODING_STRICT; + break; + case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ + default: + break; + } break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); + case FAILINDEX: + failVarObj = objv[argIndex]; break; - default: - goto encConvFromError; + } } - } else { - encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); - ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; - Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); - return TCL_ERROR; + /* Get encoding after opts so no need to free it on option error */ + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) + != TCL_OK) { + return TCL_ERROR; + } + data = objv[objc - 1]; } /* @@ -711,83 +837,12 @@ EncodingConverttoObjCmd( int length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ int result; -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int flags = TCL_ENCODING_STOPONERROR; -#else - int flags = TCL_ENCODING_NOCOMPLAIN; -#endif - Tcl_Obj *failVarObj = NULL; - - /* - * Decode parameters: - * Possible combinations: - * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -failindex val data -> objc = 4 - * 6) -failindex val encoding data -> objc = 5 - */ - - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if (objc > 2 && objc < 7) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - stringPtr = Tcl_GetString(objv[1]); - if (stringPtr[0] == '-' && stringPtr[1] == 'n' - && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - stringPtr = Tcl_GetString(objv[2]); - if (stringPtr[0] == '-' && stringPtr[1] == 'f' - && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { - /* at least two additional arguments needed */ - if (objc < 6) { - goto encConvToError; - } - failVarObj = objv[3]; - objcUnprocessed -= 2; - } - } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' - && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvToError; - } - failVarObj = objv[2]; - flags = TCL_ENCODING_STOPONERROR; - objcUnprocessed -= 2; - stringPtr = Tcl_GetString(objv[3]); - if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed --; - } - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } - break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); - break; - default: - goto encConvToError; - } - } else { - encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); - ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; - Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); + int flags; + Tcl_Obj *failVarObj; + if (EncodingConvertParseOptions( + interp, objc, objv, 1, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 288b07c..bdd091f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -4085,6 +4085,40 @@ InitializeEncodingSearchPath( } /* + *------------------------------------------------------------------------ + * + * TclEncodingProfileParseName -- + * + * Maps an encoding profile name to its enum value. + * + * Results: + * TCL_OK on success or TCL_ERROR on failure. + * + * Side effects: + * Returns the profile enum value in *profilePtr + * + *------------------------------------------------------------------------ + */ +int +TclEncodingProfileParseName( + Tcl_Interp *interp, /* For error messages. May be NULL */ + Tcl_Obj *profileName, /* Name of profile */ + enum TclEncodingProfile *profilePtr) /* Output */ +{ + /* NOTE: Order must match enum TclEncodingProfile !!! */ + static const char *const profileNames[] = {"", "tcl8", "strict"}; + int idx; + + if (Tcl_GetIndexFromObj( + interp, profileName, profileNames, "profile", 0, &idx) + != TCL_OK) { + return TCL_ERROR; + } + *profilePtr = (enum TclEncodingProfile)idx; + return TCL_OK; +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 diff --git a/generic/tclInt.h b/generic/tclInt.h index 31c7fcb..db8ee9f 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2879,7 +2879,25 @@ MODULE_SCOPE int tclFindExecutableSearchDone; MODULE_SCOPE char *tclMemDumpFileName; MODULE_SCOPE TclPlatformType tclPlatform; +/* + * Declarations related to internal encoding functions. + */ + +/* + * Enum for encoding profiles that control encoding treatment of + * invalid bytes. NOTE: Order must match that of encodingProfileNames in + * TclEncodingProfileParseName() !!! + */ +enum TclEncodingProfile { + TCL_ENCODING_PROFILE_DEFAULT, + TCL_ENCODING_PROFILE_TCL8, + TCL_ENCODING_PROFILE_STRICT, +}; MODULE_SCOPE Tcl_Encoding tclIdentityEncoding; +MODULE_SCOPE int +TclEncodingProfileParseName(Tcl_Interp *interp, + Tcl_Obj *profileName, + enum TclEncodingProfile *profilePtr); /* * TIP #233 (Virtualized Time) @@ -4787,6 +4805,8 @@ MODULE_SCOPE Tcl_LibraryInitProc TclThread_Init; MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_Init; MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit; + + /* *---------------------------------------------------------------- * Macro used by the Tcl core to check whether a pattern has any characters diff --git a/tests/encoding.test b/tests/encoding.test index ae6c78a..813cd84 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -299,7 +299,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto -nocomplain iso8859-3 Õ] + append x [encoding convertto -profile tcl8 iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -348,67 +348,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {10 edb882f09f9882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto -nocomplain utf-8 \uDE02é] + set y [encoding convertto -profile tcl8 utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto -nocomplain utf-8 \uDA02é] + set y [encoding convertto -profile tcl8 utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto -nocomplain utf-8 \uDE02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto -nocomplain utf-8 \uDA02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto -nocomplain utf-8 \uDE02] + set y [encoding convertto -profile tcl8 utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto -nocomplain utf-8 \uDA02] + set y [encoding convertto -profile tcl8 utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -513,10 +513,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16be "\uDCDC" + encoding convertto -profile tcl8 utf-16be "\uDCDC" } -result "\xDC\xDC" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16le "\uD8D8" + encoding convertto -profile tcl8 utf-16le "\uD8D8" } -result "\xD8\xD8" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" @@ -525,35 +525,35 @@ test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16be "\uDCDC" + encoding convertto -profile strict utf-16be "\uDCDC" } -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} test encoding-17.8 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16le "\uD8D8" + encoding convertto -profile strict utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-17.9 {Utf32ToUtfProc} -body { - encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-17.10 {Utf32ToUtfProc} -body { - encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-18.1 {TableToUtfProc on invalid input} -constraints deprecated -body { list [catch {encoding convertto jis0208 \\} res] $res } -result {0 !)} -test encoding-18.2 {TableToUtfProc on invalid input with -strict} -body { - list [catch {encoding convertto -strict jis0208 \\} res] $res +test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { + list [catch {encoding convertto -profile strict jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} -test encoding-18.3 {TableToUtfProc on invalid input with -strict -failindex} -body { - list [catch {encoding convertto -strict -failindex pos jis0208 \\} res] $res $pos +test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body { + list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} -test encoding-18.4 {TableToUtfProc on invalid input with -failindex -strict} -body { - list [catch {encoding convertto -failindex pos -strict jis0208 \\} res] $res $pos +test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body { + list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} -test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body { - list [catch {encoding convertto -nocomplain jis0208 \\} res] $res +test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { + list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} { @@ -669,25 +669,25 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "\xC0\x81" @@ -713,68 +713,68 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.19 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertto utf-8 "ZX\uD800" } -result ZX\xED\xA0\x80 -test encoding-24.20 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertfrom -nocomplain "\x20"] -} 1 -test encoding-24.21 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertto -nocomplain "\x20"] -} 1 +test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { + encoding convertfrom -profile tcl8 "\x20" +} -result {wrong # args: should be "::tcl::encoding::convertfrom ??-profile profile? ?-failindex var? ?encoding?? data"} -returnCodes error +test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { + string length [encoding convertto -profile tcl8 "\x20"] +} -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} -returnCodes error test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} +} -returnCodes 1 -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -test encoding-24.24 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" +} -returnCodes 1 -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} +test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.25 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" +test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} -test encoding-24.26 {Parse valid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" +test encoding-24.26 {Parse valid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80" } -result \U40000 -test encoding-24.27 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" +test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} -test encoding-24.28 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xFF\x00\x00" +test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse noncharacter with -strict} -body { - encoding convertfrom -strict utf-8 \xEF\xBF\xBF +test encoding-24.30 {Parse noncharacter with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF +test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -strict} -body { - encoding convertto -strict utf-8 \uFFFF +test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { + encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uFFFF +test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.36 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 \xED\xA0\x80 +test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xED\xA0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} -test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xED\xA0\x80 +test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -constraints deprecated -body { encoding convertto utf-8 \uD800 } -result \xED\xA0\x80 -test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body { - encoding convertto -strict utf-8 \uD800 +test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { + encoding convertto -profile strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uD800 +test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uD800 } -result \xED\xA0\x80 file delete [file join [temporaryDirectory] iso2022.txt] @@ -931,7 +931,7 @@ test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { incr count - encoding convertto -nocomplain $name $string + encoding convertto -profile tcl8 $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. -- cgit v0.12 From 26e89b4b3c03b100a2a461c034c1930a23a4273b Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Feb 2023 12:23:37 +0000 Subject: Use common option parsing for ConvertfromObjCmd. Fix test error messages. --- generic/tclCmdAH.c | 76 ++++------------------------------------------------- tests/encoding.test | 6 ++--- 2 files changed, 8 insertions(+), 74 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 818159d..67f76a6 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -683,81 +683,15 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int flags = TCL_ENCODING_STOPONERROR; -#else - int flags = TCL_ENCODING_NOCOMPLAIN; -#endif + int flags; int result; - Tcl_Obj *failVarObj = NULL; - static const char *const options[] = {"-profile", "-failindex", NULL}; - enum convertfromOptions { PROFILE, FAILINDEX } optIndex; - enum TclEncodingProfile profile; - - /* - * Possible combinations: - * 1) data -> objc = 2 - * 2) ?options? encoding data -> objc >= 3 - * It is intentional that specifying option forces encoding to be - * specified. Less prone to user error. This should have always been - * the case even in 8.6 imho where there were no options (ie (1) - * should never have been allowed) - */ + Tcl_Obj *failVarObj; - if (objc == 1) { -numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ - Tcl_WrongNumArgs( - interp, - 1, - objv, - "??-profile profile? ?-failindex var? ?encoding?? data"); + if (EncodingConvertParseOptions( + interp, objc, objv, 1, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } - else if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else { - int argIndex; - for (argIndex = 1; argIndex < (objc-2); ++argIndex) { - if (Tcl_GetIndexFromObj( - interp, objv[argIndex], options, "option", 0, &optIndex) - != TCL_OK) { - return TCL_ERROR; - } - if (++argIndex == (objc - 2)) { - goto numArgsError; - } - switch (optIndex) { - case PROFILE: - if (TclEncodingProfileParseName( - interp, objv[argIndex], &profile) - != TCL_OK) { - return TCL_ERROR; - } - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - flags = TCL_ENCODING_NOCOMPLAIN; - break; - case TCL_ENCODING_PROFILE_STRICT: - flags = TCL_ENCODING_STRICT; - break; - case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ - default: - break; - } - break; - case FAILINDEX: - failVarObj = objv[argIndex]; - break; - } - } - /* Get encoding after opts so no need to free it on option error */ - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) - != TCL_OK) { - return TCL_ERROR; - } - data = objv[objc - 1]; - } /* * Convert the string into a byte array in 'ds' diff --git a/tests/encoding.test b/tests/encoding.test index 813cd84..e4a2acb 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -718,13 +718,13 @@ test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -b } -result {wrong # args: should be "::tcl::encoding::convertfrom ??-profile profile? ?-failindex var? ?encoding?? data"} -returnCodes error test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { string length [encoding convertto -profile tcl8 "\x20"] -} -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} -returnCodes error +} -result {wrong # args: should be "::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data"} -returnCodes error test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {::tcl::encoding::convertto ??-profile profile? ?-failindex var? ?encoding?? data} +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -- cgit v0.12 From e31133e3b0149b9bc29c9c6f06e76ccc6994df7e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Feb 2023 15:37:21 +0000 Subject: Change encoding error options to fconfigure to encoding profiles --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 23 +++++++++++------ generic/tclIO.c | 69 ++++++++++++++++----------------------------------- generic/tclInt.h | 2 +- 4 files changed, 39 insertions(+), 57 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 67f76a6..9165fda 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -611,7 +611,7 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ switch (optIndex) { case PROFILE: if (TclEncodingProfileParseName( - interp, objv[argIndex], &profile) + interp, Tcl_GetString(objv[argIndex]), &profile) != TCL_OK) { return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index bdd091f..55ace3c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -4102,20 +4102,29 @@ InitializeEncodingSearchPath( int TclEncodingProfileParseName( Tcl_Interp *interp, /* For error messages. May be NULL */ - Tcl_Obj *profileName, /* Name of profile */ + const char *profileName, /* Name of profile */ enum TclEncodingProfile *profilePtr) /* Output */ { /* NOTE: Order must match enum TclEncodingProfile !!! */ static const char *const profileNames[] = {"", "tcl8", "strict"}; int idx; - if (Tcl_GetIndexFromObj( - interp, profileName, profileNames, "profile", 0, &idx) - != TCL_OK) { - return TCL_ERROR; + for (idx = 0; idx < sizeof(profileNames) / sizeof(profileNames[0]); ++idx) { + if (!strcmp(profileName, profileNames[idx])) { + *profilePtr = (enum TclEncodingProfile)idx; + return TCL_OK; + } } - *profilePtr = (enum TclEncodingProfile)idx; - return TCL_OK; + if (interp) { + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "bad profile \"%s\". Must be \"\", \"tcl8\" or \"strict\".", + profileName)); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "PROFILE", profileName, NULL); + } + return TCL_ERROR; } /* diff --git a/generic/tclIO.c b/generic/tclIO.c index fed469c..47740ef 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7862,7 +7862,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding eofchar nocomplainencoding strictencoding translation"; + "blocking buffering buffersize encoding encodingprofile eofchar translation"; const char **argv; int argc, i; Tcl_DString ds; @@ -8060,27 +8060,17 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(1, "-nocomplainencoding")) { + if (len == 0 || HaveOpt(1, "-encodingprofile")) { if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-nocomplainencoding"); + Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); } -#ifdef TCL_NO_DEPRECATED - Tcl_DStringAppendElement(dsPtr, - (flags & CHANNEL_ENCODING_NOCOMPLAIN) ? "1" : "0"); -#else - Tcl_DStringAppendElement(dsPtr, - (flags & CHANNEL_ENCODING_STRICT) ? "0" : "1"); -#endif - if (len > 0) { - return TCL_OK; - } - } - if (len == 0 || HaveOpt(1, "-strictencoding")) { - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-strictencoding"); + if (flags & CHANNEL_ENCODING_STRICT) { + Tcl_DStringAppendElement(dsPtr, "strict"); + } else if (flags & CHANNEL_ENCODING_NOCOMPLAIN) { + Tcl_DStringAppendElement(dsPtr, "tcl8"); + } else { + Tcl_DStringAppendElement(dsPtr, ""); } - Tcl_DStringAppendElement(dsPtr, - (flags & CHANNEL_ENCODING_STRICT) ? "1" : "0"); if (len > 0) { return TCL_OK; } @@ -8341,42 +8331,25 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-nocomplainencoding")) { - int newMode; - - if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) { + } else if (HaveOpt(1, "-encodingprofile")) { + enum TclEncodingProfile profile; + if (TclEncodingProfileParseName(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; } - if (newMode) { + switch (profile) { + case TCL_ENCODING_PROFILE_TCL8: ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); SetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - } else { -#ifdef TCL_NO_DEPRECATED - ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); -#else - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT) != CHANNEL_ENCODING_STRICT) { - if (interp) { - Tcl_SetObjResult(interp, Tcl_NewStringObj( - "bad value for -nocomplainencoding: only true allowed", - TCL_INDEX_NONE)); - } - return TCL_ERROR; - } -#endif - } - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; - } else if (HaveOpt(1, "-strictencoding")) { - int newMode; - - if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) { - return TCL_ERROR; - } - if (newMode) { + break; + case TCL_ENCODING_PROFILE_STRICT: ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); SetFlag(statePtr, CHANNEL_ENCODING_STRICT); - } else { + break; + case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ + default: + ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); + break; } ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); return TCL_OK; diff --git a/generic/tclInt.h b/generic/tclInt.h index db8ee9f..82728d3 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2896,7 +2896,7 @@ enum TclEncodingProfile { MODULE_SCOPE Tcl_Encoding tclIdentityEncoding; MODULE_SCOPE int TclEncodingProfileParseName(Tcl_Interp *interp, - Tcl_Obj *profileName, + const char *profileName, enum TclEncodingProfile *profilePtr); /* -- cgit v0.12 From 100d8ce724b2ed4d9f15a045bc2e48119b53465f Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Feb 2023 16:43:12 +0000 Subject: Update tests to use -encodingprofile --- generic/tclIO.c | 30 +++++++++++++++--------------- tests/chanio.test | 6 +++--- tests/io.test | 44 ++++++++++++++++++++++---------------------- tests/ioCmd.test | 26 ++++++++++++++------------ tests/winConsole.test | 14 +++++++------- tests/zlib.test | 4 ++-- 6 files changed, 63 insertions(+), 61 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 47740ef..b76234b 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8017,6 +8017,21 @@ Tcl_GetChannelOption( return TCL_OK; } } + if (len == 0 || HaveOpt(1, "-encodingprofile")) { + if (len == 0) { + Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); + } + if (flags & CHANNEL_ENCODING_STRICT) { + Tcl_DStringAppendElement(dsPtr, "strict"); + } else if (flags & CHANNEL_ENCODING_NOCOMPLAIN) { + Tcl_DStringAppendElement(dsPtr, "tcl8"); + } else { + Tcl_DStringAppendElement(dsPtr, ""); + } + if (len > 0) { + return TCL_OK; + } + } if (len == 0 || HaveOpt(2, "-eofchar")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-eofchar"); @@ -8060,21 +8075,6 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(1, "-encodingprofile")) { - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); - } - if (flags & CHANNEL_ENCODING_STRICT) { - Tcl_DStringAppendElement(dsPtr, "strict"); - } else if (flags & CHANNEL_ENCODING_NOCOMPLAIN) { - Tcl_DStringAppendElement(dsPtr, "tcl8"); - } else { - Tcl_DStringAppendElement(dsPtr, ""); - } - if (len > 0) { - return TCL_OK; - } - } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-translation"); diff --git a/tests/chanio.test b/tests/chanio.test index fb94051..7c9857d 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -252,7 +252,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 16 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -265,7 +265,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -298,7 +298,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/io.test b/tests/io.test index 2708906..efc6374 100644 --- a/tests/io.test +++ b/tests/io.test @@ -272,7 +272,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -286,7 +286,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -319,7 +319,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -8964,7 +8964,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes} -setup { puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -nocomplainencoding 1 -buffering none + fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -8974,10 +8974,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes} -setup { removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-nocomplainencoding 1)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -nocomplainencoding 1 + fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -8991,14 +8991,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-noco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-nocomplainencoding 1)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -nocomplainencoding 1 + fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 } -body { set d [read $f] close $f @@ -9010,7 +9010,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-nocomplainencoding # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainencoding 1)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9019,7 +9019,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainenco puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -nocomplainencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9029,14 +9029,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainenco removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -nocomplainencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 } -body { set d [read $f] close $f @@ -9046,7 +9046,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9054,7 +9054,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9065,7 +9065,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s removeFile io-75.6 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary @@ -9073,7 +9073,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { puts -nonewline $f A\xA1\x1A flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9088,7 +9088,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { removeFile io-75.7 } -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} -test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9096,7 +9096,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9111,7 +9111,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -strictencoding 1 + fconfigure $f -encoding iso8859-1 -encodingprofile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9155,7 +9155,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9182,7 +9182,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9190,7 +9190,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 1a72f70..8c9d870 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -eofchar -nocomplainencoding -strictencoding -translation + -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -240,33 +240,33 @@ test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] - fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -nocomplainencoding 1 + fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -encodingprofile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 -nocomplainencoding 1 + -eofchar {} -encoding utf-16 -encodingprofile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ - -eofchar {} -encoding binary -nocomplainencoding 1 + -eofchar {} -encoding binary -encodingprofile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -369,7 +369,7 @@ test iocmd-8.20 {fconfigure command / win console channel} -constraints {nonPort # TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). test iocmd-8.21 {fconfigure command / -nocomplainencoding 0 error} -constraints { - deprecated + deprecated obsolete } -setup { # I don't know how else to open the console, but this is non-portable set console stdin @@ -378,7 +378,9 @@ test iocmd-8.21 {fconfigure command / -nocomplainencoding 0 error} -constraints } -returnCodes error -result "bad value for -nocomplainencoding: only true allowed" test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strictencoding already defined} -setup { set console stdin - set oldmode [fconfigure $console -strictencoding] + set oldprofile [fconfigure $console -encodingprofile] +} -constraints { + obsolete } -body { fconfigure $console -strictencoding 1 fconfigure $console -nocomplainencoding 0 @@ -1381,7 +1383,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -nocomplainencoding * -strictencoding 0 -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1390,7 +1392,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -nocomplainencoding * -strictencoding 0 -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1402,7 +1404,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -nocomplainencoding * -strictencoding 0 -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/winConsole.test b/tests/winConsole.test index b04f3e9..62dfbf3 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index ebbdd50..272a663 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 1 -strictencoding 0 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 52fc9a970c0239d9f74fd6313920572315e757a7 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Thu, 2 Feb 2023 22:51:26 +0000 Subject: Fix for [b8f575aa2398b0e4] and [154ed7ce564a7b4c], double-[read]/[gets] problem. Partial-read functionality commented out. --- generic/tclIOCmd.c | 6 +- tests/io.test | 450 ++++++++++++++++++++++++++++++++++------------------- 2 files changed, 297 insertions(+), 159 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 2eeb04c..5b47b08 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -331,14 +331,16 @@ Tcl_GetsObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + /* resultDictPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) , linePtr); returnOptsPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) , resultDictPtr); - code = TCL_ERROR; Tcl_SetReturnOptions(interp, returnOptsPtr); + */ + code = TCL_ERROR; goto done; } lineLen = TCL_INDEX_NONE; @@ -476,6 +478,7 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } + /* resultDictPtr = Tcl_NewDictObj(); Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) , resultPtr); @@ -485,6 +488,7 @@ Tcl_ReadObjCmd( TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); Tcl_SetReturnOptions(interp, returnOptsPtr); + */ return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index 3f00561..5bf5f10 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1560,19 +1560,29 @@ apply [list {} { set f [open $path(test1)] fconfigure $f -encoding utf-8 @strict@ -buffersize 10 set status [catch {read $f} cres copts] - set in [dict get $copts -result] - lappend res $in + #set in [dict get $copts -result] + #lappend res $in lappend res $status $cres set status [catch {read $f} cres copts] - set in [dict get $copts -result] - lappend res $in + #set in [dict get $copts -result] + #lappend res $in lappend res $status $cres set res } -cleanup { catch {close $f} - } -match glob -result {{read aaaaaaaaa} 1\ + } -match glob\ + } + + #append template {\ + # -result {{read aaaaaaaaa} 1\ + # {error reading "*": illegal byte sequence}\ + # {read {}} 1 {error reading "*": illegal byte sequence}} + #} + + append template {\ + -result {1\ {error reading "*": illegal byte sequence}\ - {read {}} 1 {error reading "*": illegal byte sequence}} + 1 {error reading "*": illegal byte sequence}} } # strict encoding may be the default in Tcl 9, but in 8 it is not @@ -9070,48 +9080,83 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - binary scan $d H* hd - lappend hd $status $cres -} -cleanup { - close $f - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd $status - lappend hd $cres - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} +apply [list {} { + + + set test { + test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set hd {} + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + } -body { + set status [catch {read $f} cres copts] + #set d [dict get $copts -result read] + #binary scan $d H* hd + lappend hd $status $cres + } -cleanup { + close $f + removeFile io-75.6 + } -match glob\ + } + + #append test {\ + # -result {41 1 {error reading "*": illegal byte sequence}} + #} + + append test {\ + -result {1 {error reading "*": illegal byte sequence}} + } + + uplevel 1 $test + + set test { + test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { + set hd {} + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. + puts -nonewline $f A\xA1\x1A + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + } -body { + set status [catch {read $f} cres copts] + #set d [dict get $copts -result read] + #binary scan $d H* hd + lappend hd [eof $f] + lappend hd $status + lappend hd $cres + fconfigure $f -encoding iso8859-1 + lappend hd [read $f];# We changed encoding, so now we can read the \xA1 + close $f + set hd + } -cleanup { + removeFile io-75.7 + } -match glob\ + } + + #append test {\ + # -result {41 0 1 {error reading "*": illegal byte sequence} ¡} + #} + + append test {\ + -result {0 1 {error reading "*": illegal byte sequence} ¡} + } + + uplevel 1 $test + + +} [namespace current]] + + test io-75.8.incomplete { incomplete uft-8 char after eof char is not an error (-strictencoding 1) @@ -9198,76 +9243,124 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { } -result 41c0 -test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { - set res {} - set fn [makeFile {} io-75.10] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\xC0 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -strictencoding 1 -buffering none -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - binary scan $d H* hd - lappend res $hd $cres - chan configure $f -encoding iso8859-1 - set d [read $f] - binary scan $d H* hd - lappend res $hd - close $f - return $res -} -cleanup { - removeFile io-75.10 -} -match glob -result {41 {error reading "*": illegal byte sequence} c0} +apply [list {} { + set test { + test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { + set res {} + set fn [makeFile {} io-75.10] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\xC0 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -strictencoding 1 -buffering none + } -body { + set status [catch {read $f} cres copts] + + #set d [dict get $copts -result read] + #binary scan $d H* hd + #lappend res $hd $cres + lappend res $cres + + chan configure $f -encoding iso8859-1 + + set d [read $f] + binary scan $d H* hd + lappend res $hd + close $f + return $res + } -cleanup { + removeFile io-75.10 + } -match glob\ + } + + #append test {\ + # -result {41 {error reading "*": illegal byte sequence} c0} + #} + + append test {\ + -result {{error reading "*": illegal byte sequence} c0} + } + + uplevel 1 $test + + + + set test { + # As utf-8 has a special treatment in multi-byte decoding, also test another + # one. + test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { + set hd {} + set fn [makeFile {} io-75.11] + set f [open $fn w+] + fconfigure $f -encoding binary + # In shiftjis, \x81 starts a two-byte sequence. + # But 2nd byte \xFF is not allowed + puts -nonewline $f A\x81\xFFA + flush $f + seek $f 0 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ + -translation lf -strictencoding 1 + } -body { + set status [catch {read $f} cres copts] + #set d [dict get $copts -result read] + #binary scan $d H* hd + lappend hd $status + lappend hd $cres + } -cleanup { + close $f + removeFile io-75.11 + } -match glob + } -# As utf-8 has a special treatment in multi-byte decoding, also test another -# one. -test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { - set fn [makeFile {} io-75.11] - set f [open $fn w+] - fconfigure $f -encoding binary - # In shiftjis, \x81 starts a two-byte sequence. - # But 2nd byte \xFF is not allowed - puts -nonewline $f A\x81\xFFA - flush $f - seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ - -translation lf -strictencoding 1 -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - binary scan $d H* hd - lappend hd $status - lappend hd $cres -} -cleanup { - close $f - removeFile io-75.11 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} + #append test {\ + # -result {41 1 {error reading "*": illegal byte sequence}} + #} + append test {\ + -result {1 {error reading "*": illegal byte sequence}} + } -test io-75.12 {invalid utf-8 encoding read is an error} -setup { - set res {} - set fn [makeFile {} io-75.12] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ + + set test { + test io-75.12 {invalid utf-8 encoding read is an error} -setup { + set hd {} + set res {} + set fn [makeFile {} io-75.12] + set f [open $fn w+] + fconfigure $f -encoding binary + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ -strictencoding 1 -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - close $f - binary scan $d H* hd - lappend res $hd $status $cres - return $res -} -cleanup { - removeFile io-75.12 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} + } -body { + set status [catch {read $f} cres copts] + #set d [dict get $copts -result read] + #binary scan $d H* hd + #lappend res $hd + lappend res $status $cres + return $res + } -cleanup { + catch {close $f} + removeFile io-75.12 + } -match glob\ + } + + #append test {\ + # -result {41 1 {error reading "*": illegal byte sequence}} + #} + + + append test {\ + -result {1 {error reading "*": illegal byte sequence}} + } + + uplevel 1 $test +} [namespace current]] + + test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] @@ -9285,25 +9378,49 @@ test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.13] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f "A\x81" - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set status [catch {read $f} cres copts] - set d [dict get $copts -result read] - binary scan $d H* hd - lappend hd $status - close $f - lappend hd $cres -} -cleanup { - removeFile io-75.13 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} + + +apply [list {} { + + set test { + test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set hd {} + set fn [makeFile {} io-75.13] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" \ + -translation lf -strictencoding 1 + } -body { + set status [catch {read $f} cres copts] + #set d [dict get $copts -result read] + #binary scan $d H* hd + lappend hd $status + lappend hd $cres + } -cleanup { + catch {close $f} + removeFile io-75.13 + } -match glob\ + } + + #append test {\ + # -result {41 1 {error reading "*": illegal byte sequence}} + #} + + append test {\ + -result {1 {error reading "*": illegal byte sequence}} + } + + uplevel 1 $test + + set test { + } + +} [namespace current]] + test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after error} -setup { set res {} @@ -9329,34 +9446,51 @@ test io-75.14 {invalid utf-8 encoding [gets] coninues in non-strict mode after e } -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} -test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { - set res {} - set fn [makeFile {} io-75.15] - set chan [open $fn w+] - fconfigure $chan -encoding binary - # This is not valid UTF-8 - puts $chan hello\nAB\xc0\x40CD\nEFG - close $chan -} -body { - #Now try to read it with [gets] - set chan [open $fn] - fconfigure $chan -encoding utf-8 -strictencoding 1 - lappend res [gets $chan] - set status [catch {gets $chan} cres copts] - lappend res $status $cres - set status [catch {gets $chan} cres copts] - lappend res $status $cres - lappend res [dict get $copts -result] - chan configur $chan -encoding binary - foreach char [split [read $chan 2] {}] { - lappend res [format %x [scan $char %c]] + +apply [list {} { + set test { + test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { + set res {} + set fn [makeFile {} io-75.15] + set chan [open $fn w+] + fconfigure $chan -encoding binary + # This is not valid UTF-8 + puts $chan hello\nAB\xc0\x40CD\nEFG + close $chan + } -body { + #Now try to read it with [gets] + set chan [open $fn] + fconfigure $chan -encoding utf-8 -strictencoding 1 + lappend res [gets $chan] + set status [catch {gets $chan} cres copts] + lappend res $status $cres + set status [catch {gets $chan} cres copts] + lappend res $status $cres + #lappend res [dict get $copts -result] + chan configur $chan -encoding binary + foreach char [split [read $chan 2] {}] { + lappend res [format %x [scan $char %c]] + } + return $res + } -cleanup { + close $chan + removeFile io-75.15 + } -match glob\ } - return $res -} -cleanup { - close $chan - removeFile io-75.15 -} -match glob -result {hello 1 {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence} {read AB} c0 40} + + #append test {\ + # -result {hello 1 {error reading "*": illegal byte sequence}\ + # 1 {error reading "*": illegal byte sequence} {read AB} c0 40} + #} + + append test {\ + -result {hello 1 {error reading "*": illegal byte sequence}\ + 1 {error reading "*": illegal byte sequence} c0 40} + } + + uplevel 1 $test + +} [namespace current]] test io-76.0 {channel modes} -setup { -- cgit v0.12 From f238eb1dbc93130d15f8b4e7dd32602c1870794a Mon Sep 17 00:00:00 2001 From: pooryorick Date: Sat, 4 Feb 2023 00:28:12 +0000 Subject: Fix test io-75.14. --- tests/io.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/io.test b/tests/io.test index 0f62a4f..75255ca 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9428,7 +9428,7 @@ test io-75.14 { set res {} set fn [makeFile {} io-75.14] set f [open $fn w+] - fconfigure $f -encoding binary + fconfigure $f -translation binary # \xc0 is invalid in utf-8 puts -nonewline $f a\nb\xc0\nc\n flush $f -- cgit v0.12 From f5f5ff4257a24b2e8a8d96c820f6874c86e81304 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Feb 2023 22:43:30 +0000 Subject: Proposed fix for [10c2c17c32]: UTF-LE32 encoder mapping of surrogates. TODO: testcase --- generic/tclEncoding.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 288b07c..d19e237 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2582,6 +2582,10 @@ Utf32ToUtfProc( *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } src += sizeof(unsigned int); } -- cgit v0.12 From 694ae1913191cf93072702e7612b88544f7bea54 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 7 Feb 2023 11:22:08 +0000 Subject: Fix call to EncodingConvertParseOption for decoding --- generic/tclCmdAH.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 9165fda..02a3a46 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -688,7 +688,7 @@ EncodingConvertfromObjCmd( Tcl_Obj *failVarObj; if (EncodingConvertParseOptions( - interp, objc, objv, 1, &encoding, &data, &flags, &failVarObj) + interp, objc, objv, 0, &encoding, &data, &flags, &failVarObj) != TCL_OK) { return TCL_ERROR; } -- cgit v0.12 From e0ee29b9b606d2a3872ddf7f04332ba62433ae32 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 7 Feb 2023 11:23:52 +0000 Subject: Refactor encoding tests for broader coverage and easier test case management --- tests/cmdAH.test | 538 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 343 insertions(+), 195 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index d7a3657..22dc2a4 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -171,239 +171,387 @@ test cmdAH-3.2 {Tcl_ContinueObjCmd, success} { list [catch {continue} msg] $msg } {4 {}} -test cmdAH-4.1 {Tcl_EncodingObjCmd} -returnCodes error -body { +### +# encoding command + +set "numargErrors(encoding system)" {^wrong # args: should be "(encoding |::tcl::encoding::)system \?encoding\?"$} +set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} +set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} +set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} + +set encProfiles {tcl8 strict} + +# TODO - valid sequences for different encodings - shiftjis etc. +# Note utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +set encValidStrings { + ascii ABC \x41\x42\x43 + utf-8 A\u0000\u03A9\u8A9E\U00010384 \x41\x00\xCE\xA9\xE8\xAA\x9E\xF0\x90\x8E\x84 + utf-16le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\xA9\x03\x9E\x8A\x00\xD8\x84\xDF + utf-16be A\u0000\u03A9\u8A9E\U00010384 \x00\x41\x00\x00\x03\xA9\x8A\x9E\xD8\x00\xDF\x84 + utf-32le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\x00\x00\x00\x00\xA9\x03\x00\x00\x9E\x8A\x00\x00\x84\x03\x01\x00 + utf-32be A\u0000\u03A9\u8A9E\U00010384 \x00\x00\x00\x41\x00\x00\x00\x00\x00\x00\x03\xA9\x00\x00\x8A\x9E\x00\x01\x03\x84 +} + +# Invalid byte sequences {encoding bytes profile prefix failindex tag} +# Note tag is used in test id generation as well. The combination +# should be unique for test ids to be unique. +# Note utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +# TODO - other encodings and test cases +set encInvalidBytes { + ascii \x41\xe9\x42 default A\u00E9B -1 {non-ASCII} + ascii \x41\xe9\x42 tcl8 A\u00E9B -1 {non-ASCII} + ascii \x41\xe9\x42 strict A 1 {non-ASCII} + + utf-8 \x41\xC0\x42 default A\u00C0B -1 C0 + utf-8 \x41\xC0\x42 tcl8 A\u00C0B -1 C0 + utf-8 \x41\xC0\x42 strict A 1 C0 + utf-8 \x41\x80\x42 default A\u0080B -1 80 + utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 + utf-8 \x41\x80\x42 strict A 1 80 + utf-8 \x41\xC0\x80\x42 default A\u0000B -1 C080 + utf-8 \x41\xC0\x80\x42 tcl8 A\u0000B -1 C080 + utf-8 \x41\xC0\x80\x42 strict A 1 C080 + utf-8 \x41\xC1\x42 default A\u00C1B -1 C1 + utf-8 \x41\xC1\x42 tcl8 A\u00C1B -1 C1 + utf-8 \x41\xC1\x42 strict A 1 C1 + utf-8 \x41\xC2\x42 default A\u00C2B -1 C2-nontrail + utf-8 \x41\xC2\x42 tcl8 A\u00C2B -1 C2-nontrail + utf-8 \x41\xC2\x42 strict A 1 C2-nontrail + utf-8 \x41\xC2 default A\u00C2 -1 C2-incomplete + utf-8 \x41\xC2 tcl8 A\u00C2 -1 C2-incomplete + utf-8 \x41\xC2 strict A 1 C2-incomplete + utf-8 A\xed\xa0\x80B default A\uD800B -1 High-surrogate + utf-8 A\xed\xa0\x80B tcl8 A\uD800B -1 High-surrogate + utf-8 A\xed\xa0\x80B strict A 1 High-surrogate + utf-8 A\xed\xb0\x80B default A\uDC00B -1 Low-surrogate + utf-8 A\xed\xb0\x80B tcl8 A\uDC00B -1 Low-surrogate + utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate + + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 default A\uD800B -1 {High-surrogate} + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 tcl8 A\uD800B -1 {High-surrogate} + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 strict A 4 {High-surrogate} +} + +# Strings that cannot be encoded for specific encoding / profiles +# {encoding string profile bytes failindex tag} +# Note tag is used in test id generation as well. The combination +# should be unique for test ids to be unique. +# Note utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +# TODO - other encodings and test cases +# TODO - out of range code point (note cannot be generated by \U notation) +set encUnencodableStrings { + ascii A\u00e0B default \x41\x3f\x42 -1 non-ASCII + ascii A\u00e0B tcl8 \x41\x3f\x42 -1 non-ASCII + ascii A\u00e0B strict \x41 1 non-ASCII + + iso8859-1 A\u0141B default \x41\x3f\x42 -1 unencodable + iso8859-1 A\u0141B tcl8 \x41\x3f\x42 -1 unencodable + iso8859-1 A\u0141B strict \x41 1 unencodable + + utf-8 A\uD800B default \x41\xed\xa0\x80\x42 -1 High-surrogate + utf-8 A\uD800B tcl8 \x41\xed\xa0\x80\x42 -1 High-surrogate + utf-8 A\uD800B strict \x41 1 High-surrogate + utf-8 A\uDC00B default \x41\xed\xb0\x80\x42 -1 High-surrogate + utf-8 A\uDC00B tcl8 \x41\xed\xb0\x80\x42 -1 High-surrogate + utf-8 A\uDC00B strict \x41 1 High-surrogate +} + +if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set endian le +} else { + set endian be +} + +# +# Check errors for invalid number of arguments +proc badnumargs {id cmd cmdargs} { + variable numargErrors + test $id.a "Syntax error: $cmd $cmdargs" \ + -body [list {*}$cmd {*}$cmdargs] \ + -result $numargErrors($cmd) \ + -match regexp \ + -returnCodes error + test $id.b "Syntax error: $cmd (byte compiled)" \ + -setup [list proc compiled_proc {} [list {*}$cmd {*}$cmdargs]] \ + -body {compiled_proc} \ + -cleanup {rename compiled_proc {}} \ + -result $numargErrors($cmd) \ + -match regexp \ + -returnCodes error +} + +# Wraps tests resulting in unknown encoding errors +proc unknownencodingtest {id cmd} { + set result "unknown encoding \"[lindex $cmd end-1]\"" + test $id.a "Unknown encoding error: $cmd" \ + -body [list encoding {*}$cmd] \ + -result $result \ + -returnCodes error + test $id.b "Syntax error: $cmd (byte compiled)" \ + -setup [list proc encoding_test {} [list encoding {*}$cmd]] \ + -body {encoding_test} \ + -cleanup {rename encoding_test {}} \ + -result $result \ + -returnCodes error +} + +# Wraps tests for conversion, successful or not. +# Really more general than just for encoding conversion. +proc testconvert {id body result args} { + test $id.a $body \ + -body $body \ + -result $result \ + {*}$args + dict append args -setup \n[list proc compiled_script {} $body] + dict append args -cleanup "\nrename compiled_script {}" + test $id.b "$body (byte compiled)" \ + -body {compiled_script} \ + -result $result \ + {*}$args +} + +test cmdAH-4.1.1 {encoding} -returnCodes error -body { encoding } -result {wrong # args: should be "encoding subcommand ?arg ...?"} -test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { +test cmdAH-4.1.2 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding foo } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} -test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertto -} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertto foo bar -} -result {unknown encoding "foo"} -test cmdAH-4.5 {Tcl_EncodingObjCmd} -setup { - set system [encoding system] -} -body { - encoding system jis0208 - encoding convertto 乎 -} -cleanup { - encoding system $system -} -result 8C -test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { + +# +# encoding system 4.2.* +badnumargs cmdAH-4.2.1 {encoding system} {ascii ascii} +test cmdAH-4.2.2 {Tcl_EncodingObjCmd} -setup { set system [encoding system] } -body { encoding system iso8859-1 - encoding convertto jis0208 乎 -} -cleanup { - encoding system $system -} -result 8C -test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertfrom foo bar -} -result {unknown encoding "foo"} -test cmdAH-4.9 {Tcl_EncodingObjCmd} -setup { - set system [encoding system] -} -body { - encoding system jis0208 - encoding convertfrom 8C + encoding system } -cleanup { encoding system $system -} -result 乎 -test cmdAH-4.10 {Tcl_EncodingObjCmd} -setup { +} -result iso8859-1 + +# +# encoding convertfrom 4.3.* + +# Odd number of args is always invalid since last two args +# are ENCODING DATA and all options take a value +badnumargs cmdAH-4.3.1 {encoding convertfrom} {} +badnumargs cmdAH-4.3.2 {encoding convertfrom} {-failindex VAR ABC} +badnumargs cmdAH-4.3.3 {encoding convertfrom} {-profile VAR ABC} +badnumargs cmdAH-4.3.4 {encoding convertfrom} {-failindex VAR -profile strict ABC} +badnumargs cmdAH-4.3.5 {encoding convertfrom} {-profile strict -failindex VAR ABC} + +# Test that last two args always treated as ENCODING DATA +unknownencodingtest 4.3.6 {convertfrom -failindex ABC} +unknownencodingtest 4.3.7 {convertfrom -profile ABC} +unknownencodingtest 4.3.8 {convertfrom nosuchencoding ABC} +unknownencodingtest 4.3.9 {convertfrom -failindex VAR -profile ABC} +unknownencodingtest 4.3.10 {convertfrom -profile strict -failindex ABC} +testconvert cmdAH-4.3.11 { + encoding convertfrom jis0208 \x38\x43 +} \u4e4e -setup { set system [encoding system] -} -body { encoding system iso8859-1 - encoding convertfrom jis0208 8C } -cleanup { encoding system $system -} -result 乎 -test cmdAH-4.11 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding names foo -} -result {wrong # args: should be "encoding names"} -test cmdAH-4.12 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding system foo bar -} -result {wrong # args: should be "encoding system ?encoding?"} -test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { +} + +# Verify single arg defaults to system encoding +testconvert cmdAH-4.3.12 { + encoding convertfrom \x38\x43 +} \u4e4e -setup { set system [encoding system] -} -body { - encoding system iso8859-1 - encoding system + encoding system jis0208 } -cleanup { encoding system $system -} -result iso8859-1 +} -test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { - encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { - encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { - encoding convertfrom -failindex 2 -nocomplain ABC -} -returnCodes 1 -result {unknown encoding "-nocomplain"} -test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body { - encoding convertto -failindex 2 -nocomplain ABC -} -returnCodes 1 -result {unknown encoding "-nocomplain"} -test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { - encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { - encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { - encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { - encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { - encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { - proc encoding_test {} { - encoding convertfrom -failindex ABC +# Wrapper for verifying -failindex +proc testfailindex {id converter enc data result {profile default}} { + if {$profile eq "default"} { + testconvert $id "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + } else { + testconvert $id "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result } -} -body { - # Compile and execute - encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} -cleanup { - rename encoding_test "" } -test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { - encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { - proc encoding_test {} { - encoding convertto -failindex ABC + +# -failindex - valid data +foreach {enc string bytes} $encValidStrings { + testfailindex 4.3.13.$enc convertfrom $enc $bytes [list $string -1] + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testfailindex 4.3.13.$enc convertfrom $enc $bytes [list $string -1] } -} -body { - # Compile and execute - encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -cleanup { - rename encoding_test "" } -test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { - encoding convertfrom -failindex test ABC - set test -} -returnCodes 0 -result -1 -test cmdAH-4.19.2 {convertrom -failindex with correct data (byt compiled)} -setup { - proc encoding_test {} { - encoding convertfrom -failindex test ABC - set test + +# -failindex - invalid data +foreach {enc bytes profile prefix failidx tag} $encInvalidBytes { + testfailindex 4.3.14.$enc.$profile.$tag convertfrom $enc $bytes [list $prefix $failidx] $profile + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testfailindex 4.3.14.$enc.$profile.$tag convertfrom $enc $bytes [list $prefix $failidx] $profile } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result -1 -cleanup { - rename encoding_test "" } -test cmdAH-4.19.3 {convertrom -failindex with correct data} -body { - encoding convertto -failindex test ABC - set test -} -returnCodes 0 -result -1 -test cmdAH-4.19.4 {convertrom -failindex with correct data (byt compiled)} -setup { - proc encoding_test {} { - encoding convertto -failindex test ABC - set test + +# -profile + +# All valid byte sequences should be accepted by all profiles +foreach profile $encProfiles { + set i 0 + foreach {enc string bytes} $encValidStrings { + testconvert 4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string + } } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result -1 -cleanup { - rename encoding_test "" } -test cmdAH-4.20.1 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i + +# Cycle through the various combinations of encodings and profiles +# for invalid byte sequences +foreach {enc bytes profile prefix failidx tag} $encInvalidBytes { + if {$failidx eq -1} { + set result [list $prefix] + } else { + set badbyte "'\\x[string toupper [binary encode hex [string index $bytes $failidx]]]'" + # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch + # so glob it out for now. + set result [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob] } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" -} -test cmdAH-4.20.3 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.4 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i + if {$profile eq "default"} { + testconvert 4.3.15.$enc.$profile.$tag [list encoding convertfrom $enc $bytes] {*}$result + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.3.15.$enc.$profile.$tag [list encoding convertfrom $enc $bytes] {*}$result + } + } else { + testconvert 4.3.15.$enc.$profile.$tag [list encoding convertfrom -profile $profile $enc $bytes] {*}$result + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.3.15.$enc.$profile.$tag [list encoding convertfrom -profile $profile $enc $bytes] {*}$result + } } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.20.5 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.6 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] - binary scan $x H* y - list $y $i + +# +# encoding convertto 4.4.* + +badnumargs cmdAH-4.4.1 {encoding convertto} {} +badnumargs cmdAH-4.4.2 {encoding convertto} {-failindex VAR ABC} +badnumargs cmdAH-4.4.3 {encoding convertto} {-profile VAR ABC} +badnumargs cmdAH-4.4.4 {encoding convertto} {-failindex VAR -profile strict ABC} +badnumargs cmdAH-4.4.5 {encoding convertto} {-profile strict -failindex VAR ABC} + +# Test that last two args always treated as ENCODING DATA +unknownencodingtest 4.4.6 {convertto -failindex ABC} +unknownencodingtest 4.4.7 {convertto -profile ABC} +unknownencodingtest 4.4.8 {convertto nosuchencoding ABC} +unknownencodingtest 4.4.9 {convertto -failindex VAR -profile ABC} +unknownencodingtest 4.4.10 {convertto -profile strict -failindex ABC} +testconvert cmdAH-4.4.11 { + encoding convertto jis0208 \u4e4e +} \x38\x43 -setup { + set system [encoding system] + encoding system iso8859-1 +} -cleanup { + encoding system $system +} + +# Verify single arg defaults to system encoding +testconvert cmdAH-4.4.12 { + encoding convertto \u4e4e +} \x38\x43 -setup { + set system [encoding system] + encoding system jis0208 +} -cleanup { + encoding system $system +} + +# -failindex - valid data +foreach {enc string bytes} $encValidStrings { + testfailindex 4.4.13.$enc convertto $enc $string [list $bytes -1] + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testfailindex 4.4.13.$enc convertto $enc $string [list $bytes -1] } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.21.1 {convertto -failindex with wrong character} -body { - set x [encoding convertto -failindex i iso8859-1 A\u0141] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.21.2 {convertto -failindex with wrong character (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertto -failindex i iso8859-1 A\u0141] - binary scan $x H* y - list $y $i + +# -failindex - invalid data +foreach {enc string profile bytes failidx tag} $encUnencodableStrings { + testfailindex 4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testfailindex 4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.22 {convertfrom -strict} -body { - encoding convertfrom -strict utf-8 A\x00B -} -result A\x00B -test cmdAH-4.23 {convertfrom -strict} -body { - encoding convertfrom -strict utf-8 A\xC0\x80B -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\xC0'} +# -profile -test cmdAH-4.24 {convertto -strict} -body { - encoding convertto -strict utf-8 A\x00B -} -result A\x00B +# All valid byte sequences should be accepted by all profiles +foreach profile $encProfiles { + set i 0 + foreach {enc string bytes} $encValidStrings { + testconvert 4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes + } + } +} -test cmdAH-4.25 {convertfrom -strict} -constraints knownBug -body { - encoding convertfrom -strict utf-8 A\x80B -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x80'} +# Cycle through the various combinations of encodings and profiles +# for invalid byte sequences +foreach {enc string profile bytes failidx tag} $encUnencodableStrings { + if {$failidx eq -1} { + set result [list $bytes] + } else { + # TODO - if the bad char is unprintable, tcltest errors out when printing a mismatch + # so glob it out for now. + set result [list "unexpected character at index $failidx: *" -returnCodes error -match glob] + } + if {$profile eq "default"} { + testconvert 4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.3.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result + } + } else { + testconvert 4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result + if {"utf-16$endian" eq $enc} { + # utf-16le ->utf-16, utf-32be -> utf32 etc. + set enc [string range $enc 0 5] + testconvert 4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result + } + } +} -test cmdAH-4.26 {convertto -strict} -constraints {testbytestring knownBug} -body { - encoding convertto -strict utf-8 A[testbytestring \x80]B +test cmdAH-4.5.1 {convertto -profile strict} -constraints {testbytestring knownBug} -body { + # TODO - what does testbytestring even test? Invalid UTF8 in the Tcl_Obj bytes field + encoding convertto -profile strict utf-8 A[testbytestring \x80]B } -returnCodes error -result {unexpected byte sequence starting at index 1: '\x80'} +# +# encoding names 4.5.* +badnumargs cmdAH-4.5.1 {encoding names} {foo} +test cmdAH-4.5.2 {encoding names should include at least utf-8 and iso8859-1 and at least one more} -body { + set names [encoding names] + list [expr {"utf-8" in $names}] [expr {"iso8859-1" in $names}] [expr {[llength $names] > 2}] +} -result {1 1 1} + +# +# file command + test cmdAH-5.1 {Tcl_FileObjCmd} -returnCodes error -body { file } -result {wrong # args: should be "file subcommand ?arg ...?"} -- cgit v0.12 From b741dab392a7e58c23568bd821d7eff982c2ec14 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 7 Feb 2023 11:25:22 +0000 Subject: Fix tcltest to not exit on encoding errors when printing to stdout --- library/tcltest/tcltest.tcl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 7344f9f..94010a7 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -2221,7 +2221,11 @@ proc tcltest::test {name description args} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - puts [outputChannel] "---- Result was:\n$actualAnswer" + try { + puts [outputChannel] "---- Result was:\n$actualAnswer" + } on error {errMsg errCode} { + puts [outputChannel] "---- Result was:\n" + } puts [outputChannel] "---- Result should have been\ ($match matching):\n$result" } -- cgit v0.12 From 4294befd8b12d341c6fa74ef24120838d931a07a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 9 Feb 2023 07:27:43 +0000 Subject: Do not have -failindex imply -strict --- generic/tclCmdAH.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 02a3a46..efc156c 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -554,7 +554,6 @@ EncodingConvertParseOptions ( Tcl_Interp *interp, /* For error messages. May be NULL */ int objc, /* Number of arguments */ Tcl_Obj *const objv[], /* Argument objects as passed to command. */ - int isEncoder, /* 1 -> convertto, 0 -> convertfrom */ Tcl_Encoding *encPtr, /* Where to store the encoding */ Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */ int *flagsPtr, /* Bit mask of encoding option flags */ @@ -640,15 +639,6 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ dataObj = objv[objc - 1]; } - /* -failindex forces checking*/ - if (failVarObj != NULL && flags == TCL_ENCODING_NOCOMPLAIN) { - /* - * Historical, but I really don't like this mixing of defines - * from two different bit mask domains - ENCODING_FAILINDEX - */ - flags = isEncoder ? TCL_ENCODING_STOPONERROR : ENCODING_FAILINDEX; - } - *encPtr = encoding; *dataObjPtr = dataObj; *flagsPtr = flags; @@ -688,7 +678,7 @@ EncodingConvertfromObjCmd( Tcl_Obj *failVarObj; if (EncodingConvertParseOptions( - interp, objc, objv, 0, &encoding, &data, &flags, &failVarObj) + interp, objc, objv, &encoding, &data, &flags, &failVarObj) != TCL_OK) { return TCL_ERROR; } @@ -775,7 +765,7 @@ EncodingConverttoObjCmd( Tcl_Obj *failVarObj; if (EncodingConvertParseOptions( - interp, objc, objv, 1, &encoding, &data, &flags, &failVarObj) + interp, objc, objv, &encoding, &data, &flags, &failVarObj) != TCL_OK) { return TCL_ERROR; } -- cgit v0.12 From b185a55c3b335a847e148680c628136c7c16640f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Feb 2023 07:55:29 +0000 Subject: Add 4 testcases, showing that the bug fix introduces another (minor) problem. To be fixed soon --- tests/encoding.test | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index 05d9918..e42c3b9 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -485,6 +485,18 @@ test encoding-16.8 {Utf32ToUtfProc} -body { set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" +test encoding-16.9 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xD8\x00\x00 +} -result \uD800 +test encoding-16.10 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xDC\x00\x00 +} -result \uDC00 +test encoding-16.11 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 +} -result \uD800\uDC00 +test encoding-16.12 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 +} -result \uDC00\uD800 test encoding-16.9 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 -- cgit v0.12 From d46a2441593da26b460fba5a4612ec43fa0d9215 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 9 Feb 2023 17:03:31 +0000 Subject: Add equivalent tests from ff630bf370 --- tests/cmdAH.test | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index ad5e540..c4053a2 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -229,9 +229,21 @@ set encInvalidBytes { utf-8 A\xed\xb0\x80B tcl8 A\uDC00B -1 Low-surrogate utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 default A\uD800B -1 {High-surrogate} - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 tcl8 A\uD800B -1 {High-surrogate} - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 strict A 4 {High-surrogate} + utf-32le \x00\xD8\x00\x00 default \uD800 -1 {High-surrogate} + utf-32le \x00\xD8\x00\x00 tcl8 \uD800 -1 {High-surrogate} + utf-32le \x00\xD8\x00\x00 strict "" 0 {High-surrogate} + utf-32le \x00\xDC\x00\x00 default \uDC00 -1 {Low-surrogate} + utf-32le \x00\xDC\x00\x00 tcl8 \uDC00 -1 {Low-surrogate} + utf-32le \x00\xDC\x00\x00 strict "" 0 {Low-surrogate} + utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 default \uD800\uDC00 -1 {High-low-surrogate} + utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 tcl8 \uD800\uDC00 -1 {High-low-surrogate} + utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 strict "" 0 {High-low-surrogate} + utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 default \uDC00\uD800 -1 {High-low-surrogate} + utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 tcl8 \uDC00\uD800 -1 {High-low-surrogate} + utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 strict "" 0 {High-low-surrogate} + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 default A\uD800B -1 {High-surrogate-middle} + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 tcl8 A\uD800B -1 {High-surrogate-middle} + utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 strict A 4 {High-surrogate-middle} } # Strings that cannot be encoded for specific encoding / profiles -- cgit v0.12 From 9d1ba01f11c772a015e3edbfb1ea4ae8e9f148bf Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 9 Feb 2023 17:04:33 +0000 Subject: Modify encoding C API to use profiles (in progress) --- generic/tcl.h | 22 +++++++++- generic/tclCmdAH.c | 16 ++----- generic/tclEncoding.c | 118 ++++++++++++++++++++++++++++++++++++++------------ generic/tclIO.c | 6 ++- generic/tclInt.h | 13 +----- 5 files changed, 122 insertions(+), 53 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index f373382..ec94e71 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2144,7 +2144,27 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 #define TCL_ENCODING_NOCOMPLAIN 0x40 -#define TCL_ENCODING_STRICT 0x44 +#define TCL_ENCODING_STRICT 0x44 +/* Reserve top byte for profile values (disjoint) */ +#define TCL_ENCODING_PROFILE_TCL8 0x01000000 +#define TCL_ENCODING_PROFILE_STRICT 0x02000000 +#define TCL_ENCODING_PROFILE_MASK 0xFF000000 +#define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) +#define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ + do { \ + (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ + (flags_) |= profile_; \ + } while (0) +/* Still being argued - For Tcl9, is the default strict? TODO */ +#if TCL_MAJOR_VERSION < 9 +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 +#else +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? TODO */ +#endif + +#define TCL_ENCODING_EXTERNAL_FLAG_MASK \ + (TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR) + /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index efc156c..05c0887 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -562,7 +562,7 @@ EncodingConvertParseOptions ( { static const char *const options[] = {"-profile", "-failindex", NULL}; enum convertfromOptions { PROFILE, FAILINDEX } optIndex; - enum TclEncodingProfile profile; + int profile; Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; @@ -614,17 +614,9 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ != TCL_OK) { return TCL_ERROR; } - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - flags = TCL_ENCODING_NOCOMPLAIN; - break; - case TCL_ENCODING_PROFILE_STRICT: - flags = TCL_ENCODING_STRICT; - break; - case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ - default: - break; - } + /* TODO - next line probably not needed as the conversion + functions already take care of mapping profile to flags */ + flags = TclEncodingExternalFlagsToInternal(profile); break; case FAILINDEX: failVarObj = objv[argIndex]; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 106a2f1..8e42e26 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -542,6 +542,8 @@ TclInitEncodingSubsystem(void) Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); + /* TODO - why is NOCOMPLAIN being hardcoded for encodings below? */ + /* * Create a few initial encodings. UTF-8 to UTF-8 translation is not a * no-op because it turns a stream of improperly formed UTF-8 into a @@ -1184,13 +1186,12 @@ Tcl_ExternalToUtfDString( * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: - * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but - * return the first error position (Default in Tcl 9.0). - * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default - * fallback character. Always return -1 (Default in Tcl 8.7). - * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. - * Only valid for "utf-8" and "cesu-8". This flag may be used together - * with the other flags. + * target encoding. It should be composed by OR-ing the following: + * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile + * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags + * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80 + * to 0x00. Only valid for "utf-8" and "cesu-8". * * Results: * The converted bytes are stored in the DString, which is then NULL @@ -1236,6 +1237,7 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } + flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; @@ -1408,7 +1410,7 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr); return Tcl_DStringValue(dstPtr); } @@ -1421,15 +1423,12 @@ Tcl_UtfToExternalDString( * Convert a source buffer from UTF-8 to the specified encoding. * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the - * target encoding. - * Possible flags values: - * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but - * return the first error position (Default in Tcl 9.0). - * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default - * fallback character. Always return -1 (Default in Tcl 8.7). - * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. - * Only valid for "utf-8" and "cesu-8". This flag may be used together - * with the other flags. + * target encoding. It should be composed by OR-ing the following: + * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile + * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags + * - TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 instead + * of 0x00. Only valid for "utf-8" and "cesu-8". * * Results: * The converted bytes are stored in the DString, which is then NULL @@ -1450,7 +1449,7 @@ Tcl_UtfToExternalDStringEx( const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - int flags, /* Conversion control flags. */ + int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { @@ -1474,6 +1473,7 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen < 0) { srcLen = strlen(src); } + flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, @@ -4095,7 +4095,7 @@ InitializeEncodingSearchPath( * * TclEncodingProfileParseName -- * - * Maps an encoding profile name to its enum value. + * Maps an encoding profile name to its integer equivalent. * * Results: * TCL_OK on success or TCL_ERROR on failure. @@ -4107,17 +4107,22 @@ InitializeEncodingSearchPath( */ int TclEncodingProfileParseName( - Tcl_Interp *interp, /* For error messages. May be NULL */ - const char *profileName, /* Name of profile */ - enum TclEncodingProfile *profilePtr) /* Output */ + Tcl_Interp *interp, /* For error messages. May be NULL */ + const char *profileName, /* Name of profile */ + int *profilePtr) /* Output */ { - /* NOTE: Order must match enum TclEncodingProfile !!! */ - static const char *const profileNames[] = {"", "tcl8", "strict"}; - int idx; + /* NOTE: Order in arrays must match !!! */ + static const char *const profileNames[] = {"", "tcl8", "strict", NULL}; + static int profileFlags[] = { + TCL_ENCODING_PROFILE_DEFAULT, + TCL_ENCODING_PROFILE_TCL8, + TCL_ENCODING_PROFILE_STRICT, + }; + int i; - for (idx = 0; idx < sizeof(profileNames) / sizeof(profileNames[0]); ++idx) { - if (!strcmp(profileName, profileNames[idx])) { - *profilePtr = (enum TclEncodingProfile)idx; + for (i = 0; i < sizeof(profileNames) / sizeof(profileNames[0]); ++i) { + if (!strcmp(profileName, profileNames[i])) { + *profilePtr = profileFlags[i]; return TCL_OK; } } @@ -4134,6 +4139,63 @@ TclEncodingProfileParseName( } /* + *------------------------------------------------------------------------ + * + * TclEncodingExternalFlagsToInternal -- + * + * Maps the flags supported in the encoding C API's to internal flags. + * + * TCL_ENCODING_STRICT and TCL_ENCODING_NOCOMPLAIN are masked off + * because they are for internal use only and externally specified + * through TCL_ENCODING_PROFILE_* bits. + * + * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is + * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile + * specified. + * + * If no profile or an invalid profile is specified, it is set to + * the default. + * + * Results: + * Internal encoding flag mask. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------ + */ +int TclEncodingExternalFlagsToInternal(int flags) +{ + flags &= ~(TCL_ENCODING_STRICT | TCL_ENCODING_NOCOMPLAIN); + if (flags & TCL_ENCODING_STOPONERROR) { + TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); + } + else { + int profile = TCL_ENCODING_PROFILE_GET(flags); + switch (profile) { + case TCL_ENCODING_PROFILE_TCL8: + flags |= TCL_ENCODING_NOCOMPLAIN; + break; + case TCL_ENCODING_PROFILE_STRICT: + flags |= TCL_ENCODING_STRICT; + break; + default: + /* TODO - clean this up once default mechanisms settled */ + TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); +#if TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8 + flags |= TCL_ENCODING_NOCOMPLAIN; +#elif TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT + flags |= TCL_ENCODING_STRICT; +#else +#error TCL_ENCODING_PROFILE_DEFAULT must be TCL8 or STRICT +#endif + break; + } + } + return flags; +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 diff --git a/generic/tclIO.c b/generic/tclIO.c index 370ca95..0152740 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8379,7 +8379,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; } else if (HaveOpt(1, "-encodingprofile")) { - enum TclEncodingProfile profile; + int profile; if (TclEncodingProfileParseName(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; } @@ -8392,7 +8392,11 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); SetFlag(statePtr, CHANNEL_ENCODING_STRICT); break; + /* TODO - clean up this DEFAULT handling once channel flags fixed */ +#if TCL_ENCODING_PROFILE_DEFAULT != TCL_ENCODING_PROFILE_TCL8 \ + && TCL_ENCODING_PROFILE_DEFAULT != TCL_ENCODING_PROFILE_STRICT case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ +#endif default: ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); diff --git a/generic/tclInt.h b/generic/tclInt.h index 82728d3..2b491d6 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2883,21 +2883,12 @@ MODULE_SCOPE TclPlatformType tclPlatform; * Declarations related to internal encoding functions. */ -/* - * Enum for encoding profiles that control encoding treatment of - * invalid bytes. NOTE: Order must match that of encodingProfileNames in - * TclEncodingProfileParseName() !!! - */ -enum TclEncodingProfile { - TCL_ENCODING_PROFILE_DEFAULT, - TCL_ENCODING_PROFILE_TCL8, - TCL_ENCODING_PROFILE_STRICT, -}; MODULE_SCOPE Tcl_Encoding tclIdentityEncoding; MODULE_SCOPE int TclEncodingProfileParseName(Tcl_Interp *interp, const char *profileName, - enum TclEncodingProfile *profilePtr); + int *profilePtr); +MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); /* * TIP #233 (Virtualized Time) -- cgit v0.12 From fd83fb931e43901b77f4e480ef63841e10b39f22 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Feb 2023 19:52:00 +0000 Subject: Add 4 more testcases, showing that the same bug is present in utf-16 as well. Also fix the bug (really, now!) --- generic/tclEncoding.c | 44 ++++++++++++++++++++++++++++++++++++-------- tests/encoding.test | 12 ++++++++++++ 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d19e237..0941f14 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2531,7 +2531,7 @@ Utf32ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - int ch; + int ch = 0; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2548,6 +2548,19 @@ Utf32ToUtfProc( srcLen &= -4; } + /* + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. + */ + + if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) { + result = TCL_CONVERT_MULTIBYTE; + srcLen-= 4; + } + srcStart = src; srcEnd = src + srcLen; @@ -2560,11 +2573,16 @@ Utf32ToUtfProc( break; } + int prev = ch; if (flags & TCL_ENCODING_LE) { ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } + if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800))) { if (STOPONERROR) { @@ -2582,14 +2600,14 @@ Utf32ToUtfProc( *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); - if ((ch & ~0x3FF) == 0xD800) { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); - } } src += sizeof(unsigned int); } + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2734,7 +2752,7 @@ Utf16ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - unsigned short ch; + unsigned short ch = 0; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2752,10 +2770,11 @@ Utf16ToUtfProc( } /* - * If last code point is a high surrogate, we cannot handle that yet. + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. */ - if ((srcLen >= 2) && + if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) && ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; @@ -2773,11 +2792,16 @@ Utf16ToUtfProc( break; } + unsigned short prev = ch; if (flags & TCL_ENCODING_LE) { ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } + if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } /* * Special case for 1-byte utf chars for speed. Make sure we work with @@ -2792,6 +2816,10 @@ Utf16ToUtfProc( src += sizeof(unsigned short); } + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; diff --git a/tests/encoding.test b/tests/encoding.test index e42c3b9..b2b029e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -497,6 +497,18 @@ test encoding-16.11 {Utf32ToUtfProc} -body { test encoding-16.12 {Utf32ToUtfProc} -body { encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 +test encoding-16.13 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xD8 +} -result \uD800 +test encoding-16.14 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xDC +} -result \uDC00 +test encoding-16.15 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xD8\x00\xDC +} -result \uD800\uDC00 +test encoding-16.16 {Utf16ToUtfProc} -body { + encoding convertfrom utf-16le \x00\xDC\x00\xD8 +} -result \uDC00\uD800 test encoding-16.9 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 -- cgit v0.12 From e26214c28753b22c398ba4d7196a8afae999ab5a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 10 Feb 2023 17:07:12 +0000 Subject: Phase out (almost) STRICT and NOCOMPLAIN flags. --- generic/tclCmdAH.c | 38 +++++++++------- generic/tclEncoding.c | 114 +++++++++++++++++++++++++++++++++++------------- generic/tclIO.c | 118 ++++++++++++-------------------------------------- generic/tclIO.h | 3 +- generic/tclInt.h | 8 ++-- 5 files changed, 140 insertions(+), 141 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 05c0887..5fbe27e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -543,7 +543,7 @@ TclInitEncodingCmd( * if non-NULL * - *dataObjPtr is set to the Tcl_Obj containing the data to encode or * decode - * - *flagsPtr is set to encoding error handling flags + * - *profilePtr is set to encoding error handling profile * - *failVarPtr is set to -failindex option value or NULL * On error, all of the above are uninitialized. * @@ -556,20 +556,19 @@ EncodingConvertParseOptions ( Tcl_Obj *const objv[], /* Argument objects as passed to command. */ Tcl_Encoding *encPtr, /* Where to store the encoding */ Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */ - int *flagsPtr, /* Bit mask of encoding option flags */ + int *profilePtr, /* Bit mask of encoding option profile */ Tcl_Obj **failVarPtr /* Where to store -failindex option value */ ) { static const char *const options[] = {"-profile", "-failindex", NULL}; enum convertfromOptions { PROFILE, FAILINDEX } optIndex; - int profile; Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int flags = TCL_ENCODING_STOPONERROR; + int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ #else - int flags = TCL_ENCODING_NOCOMPLAIN; + int profile = TCL_ENCODING_PROFILE_TCL8; #endif /* @@ -609,14 +608,16 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ } switch (optIndex) { case PROFILE: - if (TclEncodingProfileParseName( + if (TclEncodingProfileNameToId( interp, Tcl_GetString(objv[argIndex]), &profile) != TCL_OK) { return TCL_ERROR; } +#ifdef NOTNEEDED /* TODO - next line probably not needed as the conversion functions already take care of mapping profile to flags */ - flags = TclEncodingExternalFlagsToInternal(profile); + profile = TclEncodingExternalFlagsToInternal(profile); +#endif break; case FAILINDEX: failVarObj = objv[argIndex]; @@ -633,7 +634,7 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ *encPtr = encoding; *dataObjPtr = dataObj; - *flagsPtr = flags; + *profilePtr = profile; *failVarPtr = failVarObj; return TCL_OK; @@ -676,20 +677,23 @@ EncodingConvertfromObjCmd( } /* - * Convert the string into a byte array in 'ds' + * Convert the string into a byte array in 'ds'. */ #if !defined(TCL_NO_DEPRECATED) && (TCL_MAJOR_VERSION < 9) - if (!(flags & TCL_ENCODING_STOPONERROR)) { + if (TCL_ENCODING_PROFILE_GET(flags) == TCL_ENCODING_PROFILE_TCL8) { + /* Permits high bits to be non-0 in byte array (Tcl 8 style) */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); - } else + } + else #endif - bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); + bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); + if (bytesPtr == NULL) { return TCL_ERROR; } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { + if (result != TCL_INDEX_NONE) { if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; @@ -704,7 +708,8 @@ EncodingConvertfromObjCmd( Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { + } + else if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; } @@ -769,7 +774,7 @@ EncodingConverttoObjCmd( stringPtr = TclGetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { + if (result != TCL_INDEX_NONE) { if (failVarObj != NULL) { /* I hope, wide int will cover size_t data type */ if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { @@ -788,7 +793,8 @@ EncodingConverttoObjCmd( Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { + } + else if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8e42e26..153f8d3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -188,6 +188,15 @@ static Tcl_Encoding systemEncoding = NULL; Tcl_Encoding tclIdentityEncoding = NULL; /* + * Names of encoding profiles and corresponding integer values + */ +static struct TclEncodingProfiles { + const char *name; + int value; +} encodingProfiles[] = {{"tcl8", TCL_ENCODING_PROFILE_TCL8}, + {"strict", TCL_ENCODING_PROFILE_STRICT}}; + +/* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */ @@ -1172,7 +1181,7 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr); return Tcl_DStringValue(dstPtr); } @@ -2315,11 +2324,17 @@ BinaryProc( *------------------------------------------------------------------------- */ +#ifdef OBSOLETE #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) # define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN) || (flags & TCL_ENCODING_STOPONERROR)) #else # define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) #endif +#endif + + +#define STRICT_PROFILE(flags_) (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) +#define STOPONERROR STRICT_PROFILE(flags) static int UtfToUtfProc( @@ -2386,10 +2401,11 @@ UtfToUtfProc( */ *dst++ = *src++; - } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) - || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - || (flags & ENCODING_FAILINDEX))) { + } else if ((UCHAR(*src) == 0xC0) && + (src + 1 < srcEnd) && + (UCHAR(src[1]) == 0x80) && + (!(flags & TCL_ENCODING_MODIFIED) + || (STRICT_PROFILE(flags)))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ @@ -2403,7 +2419,8 @@ UtfToUtfProc( */ *dst++ = 0; src += 2; - } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + } + else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an @@ -2416,10 +2433,10 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { - result = TCL_CONVERT_SYNTAX; - break; - } + if (STRICT_PROFILE(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } ch = UCHAR(*src++); } else { char chbuf[2]; @@ -2427,12 +2444,13 @@ UtfToUtfProc( TclUtfToUCS4(chbuf, &ch); } dst += Tcl_UniCharToUtf(ch, dst); - } else { + } + else { int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + && STRICT_PROFILE(flags)) { result = TCL_CONVERT_SYNTAX; break; } @@ -2475,8 +2493,9 @@ UtfToUtfProc( result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; - } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { + } else if (STRICT_PROFILE(flags) && + (flags & TCL_ENCODING_MODIFIED) && + ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -2567,8 +2586,8 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if ((unsigned)ch > 0x10FFFF || (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && ((ch & ~0x7FF) == 0xD800))) { + if ((unsigned)ch > 0x10FFFF + || (STRICT_PROFILE(flags) && ((ch & ~0x7FF) == 0xD800))) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; @@ -4095,34 +4114,27 @@ InitializeEncodingSearchPath( * * TclEncodingProfileParseName -- * - * Maps an encoding profile name to its integer equivalent. + * Maps an encoding profile name to its integer equivalent. * * Results: - * TCL_OK on success or TCL_ERROR on failure. + * TCL_OK on success or TCL_ERROR on failure. * * Side effects: - * Returns the profile enum value in *profilePtr + * Returns the profile enum value in *profilePtr * *------------------------------------------------------------------------ */ int -TclEncodingProfileParseName( +TclEncodingProfileNameToId( Tcl_Interp *interp, /* For error messages. May be NULL */ const char *profileName, /* Name of profile */ int *profilePtr) /* Output */ { - /* NOTE: Order in arrays must match !!! */ - static const char *const profileNames[] = {"", "tcl8", "strict", NULL}; - static int profileFlags[] = { - TCL_ENCODING_PROFILE_DEFAULT, - TCL_ENCODING_PROFILE_TCL8, - TCL_ENCODING_PROFILE_STRICT, - }; int i; - for (i = 0; i < sizeof(profileNames) / sizeof(profileNames[0]); ++i) { - if (!strcmp(profileName, profileNames[i])) { - *profilePtr = profileFlags[i]; + for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + if (!strcmp(profileName, encodingProfiles[i].name)) { + *profilePtr = encodingProfiles[i].value; return TCL_OK; } } @@ -4130,13 +4142,52 @@ TclEncodingProfileParseName( Tcl_SetObjResult( interp, Tcl_ObjPrintf( - "bad profile \"%s\". Must be \"\", \"tcl8\" or \"strict\".", + "bad profile \"%s\". Must be \"tcl8\" or \"strict\".", profileName)); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "PROFILE", profileName, NULL); } return TCL_ERROR; } + +/* + *------------------------------------------------------------------------ + * + * TclEncodingProfileValueToName -- + * + * Maps an encoding profile value to its name. + * + * Results: + * Pointer to the name or NULL on failure. Caller must not make + * not modify the string and must make a copy to hold on to it. + * + * Side effects: + * None. + *------------------------------------------------------------------------ + */ +const char * +TclEncodingProfileIdToName( + Tcl_Interp *interp, /* For error messages. May be NULL */ + int profileValue) /* Profile #define value */ +{ + int i; + + for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + if (profileValue == encodingProfiles[i].value) { + return encodingProfiles[i].name; + } + } + if (interp) { + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "Internal error. Bad profile id \"%d\".", + profileValue)); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "PROFILEID", NULL); + } + return NULL; +} /* *------------------------------------------------------------------------ @@ -4179,6 +4230,7 @@ int TclEncodingExternalFlagsToInternal(int flags) case TCL_ENCODING_PROFILE_STRICT: flags |= TCL_ENCODING_STRICT; break; + case 0: /* Unspecified by caller */ default: /* TODO - clean this up once default mechanisms settled */ TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); diff --git a/generic/tclIO.c b/generic/tclIO.c index 0152740..49f4257 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1700,8 +1700,12 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, + TCL_ENCODING_PROFILE_DEFAULT); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, + TCL_ENCODING_PROFILE_DEFAULT); /* * Set the channel up initially in AUTO input translation mode to accept @@ -4394,21 +4398,6 @@ Write( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->outputEncodingFlags |= TCL_ENCODING_STRICT; -#ifdef TCL_NO_DEPRECATED - } else if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->outputEncodingFlags &= ~TCL_ENCODING_STRICT; - statePtr->outputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; -#endif - } else { - statePtr->outputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * Write the terminated escape sequence even if srcLen is 0. */ @@ -4733,21 +4722,6 @@ Tcl_GetsObj( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; -#ifdef TCL_NO_DEPRECATED - } else if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; -#endif - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * Object used by FilterInputBytes to keep track of how much data has been * consumed from the channel buffers. */ @@ -5528,21 +5502,6 @@ FilterInputBytes( } gsPtr->state = statePtr->inputEncodingState; - /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; -#ifdef TCL_NO_DEPRECATED - } else if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; -#endif - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen, statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE, &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead, @@ -6349,21 +6308,6 @@ ReadChars( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; -#ifdef TCL_NO_DEPRECATED - } else if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; -#endif - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * This routine is burdened with satisfying several constraints. It cannot * append more than 'charsToRead` chars onto objPtr. This is measured * after encoding and translation transformations are completed. There is @@ -8065,16 +8009,18 @@ Tcl_GetChannelOption( } } if (len == 0 || HaveOpt(1, "-encodingprofile")) { + int profile; + const char *profileName; if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); } - if (flags & CHANNEL_ENCODING_STRICT) { - Tcl_DStringAppendElement(dsPtr, "strict"); - } else if (flags & CHANNEL_ENCODING_NOCOMPLAIN) { - Tcl_DStringAppendElement(dsPtr, "tcl8"); - } else { - Tcl_DStringAppendElement(dsPtr, ""); + /* Note currently input and output profiles are same */ + profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); + profileName = TclEncodingProfileIdToName(interp, profile); + if (profileName == NULL) { + return TCL_ERROR; } + Tcl_DStringAppendElement(dsPtr, profileName); if (len > 0) { return TCL_OK; } @@ -8293,6 +8239,7 @@ Tcl_SetChannelOption( return TCL_OK; } else if (HaveOpt(2, "-encoding")) { Tcl_Encoding encoding; + int profile; if ((newValue[0] == '\0') || (strcmp(newValue, "binary") == 0)) { encoding = NULL; @@ -8317,9 +8264,12 @@ Tcl_SetChannelOption( Tcl_FreeEncoding(statePtr->encoding); statePtr->encoding = encoding; statePtr->inputEncodingState = NULL; + profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); statePtr->inputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); /* Same as input */ ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; @@ -8380,28 +8330,11 @@ Tcl_SetChannelOption( return TCL_OK; } else if (HaveOpt(1, "-encodingprofile")) { int profile; - if (TclEncodingProfileParseName(interp, newValue, &profile) != TCL_OK) { + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; } - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); - SetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - break; - case TCL_ENCODING_PROFILE_STRICT: - ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - SetFlag(statePtr, CHANNEL_ENCODING_STRICT); - break; - /* TODO - clean up this DEFAULT handling once channel flags fixed */ -#if TCL_ENCODING_PROFILE_DEFAULT != TCL_ENCODING_PROFILE_TCL8 \ - && TCL_ENCODING_PROFILE_DEFAULT != TCL_ENCODING_PROFILE_STRICT - case TCL_ENCODING_PROFILE_DEFAULT: /* FALLTHRU */ -#endif - default: - ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); - break; - } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); return TCL_OK; } else if (HaveOpt(1, "-translation")) { @@ -9493,12 +9426,17 @@ TclCopyChannel( * of the bytes themselves. */ + /* + * TODO - should really only allow lossless profiles. Below reflects + * Tcl 8.7 alphas prior to encoding profiles + */ + moveBytes = inStatePtr->inEofChar == '\0' /* No eofChar to stop input */ && inStatePtr->inputTranslation == TCL_TRANSLATE_LF && outStatePtr->outputTranslation == TCL_TRANSLATE_LF && inStatePtr->encoding == outStatePtr->encoding - && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT - && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; + && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; /* * Allocate a new CopyState to maintain info about the current copy in @@ -9826,8 +9764,8 @@ CopyData( inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding - && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT - && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; + && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; if (!(inBinary || sameEncoding)) { TclNewObj(bufObj); diff --git a/generic/tclIO.h b/generic/tclIO.h index a69e990..3f2feee 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -275,16 +275,17 @@ typedef struct ChannelState { * encountered an encoding error */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ +#ifdef APN #define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option * -nocomplainencoding is set to 1 */ #define CHANNEL_ENCODING_STRICT (1<<18) /* set if option * -strictencoding is set to 1 */ +#endif #define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed. * Its structures are still live and * usable, but it may not be closed * again from within the close * handler. */ -#define ENCODING_FAILINDEX (1<<20) /* Internal flag, fail on Invalid bytes only */ #define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed. * No further Tcl-level write IO on * the channel is allowed. */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 2b491d6..4b6303d 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2885,9 +2885,11 @@ MODULE_SCOPE TclPlatformType tclPlatform; MODULE_SCOPE Tcl_Encoding tclIdentityEncoding; MODULE_SCOPE int -TclEncodingProfileParseName(Tcl_Interp *interp, - const char *profileName, - int *profilePtr); +TclEncodingProfileNameToId(Tcl_Interp *interp, + const char *profileName, + int *profilePtr); +MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, + int profileId); MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); /* -- cgit v0.12 From bab9170bdca67622ada57df9a0e7f55c5ac92b2f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Feb 2023 20:22:07 +0000 Subject: Proposed fix (and testcases) for [4a7397e0b3]: Tcl 9: fcopy with -strictencoding 1 UTF-8 channels --- generic/tclIO.c | 8 +++++ tests/io.test | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 105 insertions(+) diff --git a/generic/tclIO.c b/generic/tclIO.c index fed469c..2e0cd1f 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -9820,12 +9820,20 @@ CopyData( Tcl_SetErrno(inStatePtr->unreportedError); inStatePtr->unreportedError = 0; goto readError; + } else if (inStatePtr->flags & CHANNEL_ENCODING_ERROR) { + Tcl_SetErrno(EILSEQ); + inStatePtr->flags &= ~CHANNEL_ENCODING_ERROR; + goto readError; } Tcl_GetChannelError(outChan, &msg); if ((outStatePtr->unreportedError != 0) || (msg != NULL)) { Tcl_SetErrno(outStatePtr->unreportedError); outStatePtr->unreportedError = 0; goto writeError; + } else if (outStatePtr->flags & CHANNEL_ENCODING_ERROR) { + Tcl_SetErrno(EILSEQ); + outStatePtr->flags &= ~CHANNEL_ENCODING_ERROR; + goto writeError; } if (cmdPtr && (mask == 0)) { diff --git a/tests/io.test b/tests/io.test index 2708906..7b8182e 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,6 +7609,103 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 +test io-52.20 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means reading the "Á" gives an error + fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $out -encoding koi8-r -translation lf + + fcopy $in $out +} -cleanup { + close $in + close $out +} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} +test io-52.21 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means writing the "Á" gives an error + fconfigure $in -encoding utf-8 + fconfigure $out -encoding ascii -translation lf -strictencoding 1 + + fcopy $in $out +} -cleanup { + close $in + close $out +} -returnCodes 1 -match glob -result {error writing "file*": illegal byte sequence} +test io-52.22 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means reading the "Á" gives an error + fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $out -encoding koi8-r -translation lf + proc ::xxx args { + set ::s0 $args + } + + fcopy $in $out -command ::xxx + vwait ::s0 + set ::s0 +} -cleanup { + close $in + close $out + unset ::s0 +} -match glob -result {0 {error reading "file*": illegal byte sequence}} +test io-52.23 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means writing the "Á" gives an error + fconfigure $in -encoding utf-8 + fconfigure $out -encoding ascii -translation lf -strictencoding 1 + proc ::xxx args { + set ::s0 $args + } + + fcopy $in $out -command ::xxx + vwait ::s0 + set ::s0 +} -cleanup { + close $in + close $out + unset ::s0 +} -match glob -result {0 {error writing "file*": illegal byte sequence}} + test io-53.1 {CopyData} {fcopy} { file delete $path(test1) -- cgit v0.12 From c2f0e2f8da529b6bd9f8793a07e73ed1bb6eb903 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 11 Feb 2023 01:51:32 +0000 Subject: Eliminate TCL_ENCODING_{STRICT,NOCOMPLAIN} --- generic/tcl.h | 12 ++---------- generic/tclEncoding.c | 37 ++++++++----------------------------- generic/tclIO.h | 6 ------ 3 files changed, 10 insertions(+), 45 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index ec94e71..b7d31aa 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2127,14 +2127,8 @@ typedef struct Tcl_EncodingType { * 0x00. Only valid for "utf-8" and "cesu-8". * This flag is implicit for external -> internal conversions, * optional for internal -> external conversions. - * TCL_ENCODING_NOCOMPLAIN - If set, the converter - * substitutes the problematic character(s) with - * one or more "close" characters in the - * destination buffer and then continues to - * convert the source. If clear, the converter returns - * immediately upon encountering an invalid byte sequence - * or a source character that has no mapping in the - * target encoding. Only for Tcl 9.x. + * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note + * these are bit masks. */ #define TCL_ENCODING_START 0x01 @@ -2143,8 +2137,6 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 -#define TCL_ENCODING_NOCOMPLAIN 0x40 -#define TCL_ENCODING_STRICT 0x44 /* Reserve top byte for profile values (disjoint) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 153f8d3..85c2b6a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -574,7 +574,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(TCL_ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -583,13 +583,13 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; - type.clientData = INT2PTR(TCL_ENCODING_LE|TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; - type.clientData = INT2PTR(isLe.c|TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; @@ -2324,16 +2324,11 @@ BinaryProc( *------------------------------------------------------------------------- */ -#ifdef OBSOLETE -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) -# define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN) || (flags & TCL_ENCODING_STOPONERROR)) -#else -# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) -#endif -#endif - +#define STRICT_PROFILE(flags_) \ + ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ + || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ + && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) -#define STRICT_PROFILE(flags_) (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) #define STOPONERROR STRICT_PROFILE(flags) static int @@ -4196,10 +4191,6 @@ TclEncodingProfileIdToName( * * Maps the flags supported in the encoding C API's to internal flags. * - * TCL_ENCODING_STRICT and TCL_ENCODING_NOCOMPLAIN are masked off - * because they are for internal use only and externally specified - * through TCL_ENCODING_PROFILE_* bits. - * * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile * specified. @@ -4217,7 +4208,6 @@ TclEncodingProfileIdToName( */ int TclEncodingExternalFlagsToInternal(int flags) { - flags &= ~(TCL_ENCODING_STRICT | TCL_ENCODING_NOCOMPLAIN); if (flags & TCL_ENCODING_STOPONERROR) { TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); } @@ -4225,22 +4215,11 @@ int TclEncodingExternalFlagsToInternal(int flags) int profile = TCL_ENCODING_PROFILE_GET(flags); switch (profile) { case TCL_ENCODING_PROFILE_TCL8: - flags |= TCL_ENCODING_NOCOMPLAIN; - break; case TCL_ENCODING_PROFILE_STRICT: - flags |= TCL_ENCODING_STRICT; break; case 0: /* Unspecified by caller */ default: - /* TODO - clean this up once default mechanisms settled */ TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); -#if TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8 - flags |= TCL_ENCODING_NOCOMPLAIN; -#elif TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT - flags |= TCL_ENCODING_STRICT; -#else -#error TCL_ENCODING_PROFILE_DEFAULT must be TCL8 or STRICT -#endif break; } } diff --git a/generic/tclIO.h b/generic/tclIO.h index 3f2feee..dded07f 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -275,12 +275,6 @@ typedef struct ChannelState { * encountered an encoding error */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ -#ifdef APN -#define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option - * -nocomplainencoding is set to 1 */ -#define CHANNEL_ENCODING_STRICT (1<<18) /* set if option - * -strictencoding is set to 1 */ -#endif #define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed. * Its structures are still live and * usable, but it may not be closed -- cgit v0.12 From 727887b6dc02960e49117cb5db99e44806a0327f Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 11 Feb 2023 17:38:07 +0000 Subject: Partial implementation of replace profile --- generic/tcl.h | 7 +-- generic/tclEncoding.c | 119 +++++++++++++++++++++++++++++++++++++++----------- tests/cmdAH.test | 3 ++ 3 files changed, 99 insertions(+), 30 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index b7d31aa..3fc53db 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2140,6 +2140,7 @@ typedef struct Tcl_EncodingType { /* Reserve top byte for profile values (disjoint) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 +#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 #define TCL_ENCODING_PROFILE_MASK 0xFF000000 #define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) #define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ @@ -2151,13 +2152,9 @@ typedef struct Tcl_EncodingType { #if TCL_MAJOR_VERSION < 9 #define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 #else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? TODO */ +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ #endif -#define TCL_ENCODING_EXTERNAL_FLAG_MASK \ - (TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR) - - /* * The following definitions are the error codes returned by the conversion * routines: diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 85c2b6a..bb1f32f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -193,8 +193,12 @@ Tcl_Encoding tclIdentityEncoding = NULL; static struct TclEncodingProfiles { const char *name; int value; -} encodingProfiles[] = {{"tcl8", TCL_ENCODING_PROFILE_TCL8}, - {"strict", TCL_ENCODING_PROFILE_STRICT}}; +} encodingProfiles[] = { + {"tcl8", TCL_ENCODING_PROFILE_TCL8}, + {"strict", TCL_ENCODING_PROFILE_STRICT}, + {"replace", TCL_ENCODING_PROFILE_REPLACE}, +}; +#define UNICODE_REPLACE_CHAR 0xFFFD /* * The following variable is used in the sparse matrix code for a @@ -2336,7 +2340,7 @@ UtfToUtfProc( void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ - int flags, /* Conversion control flags. */ + int flags, /* TCL_ENCODING_* conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ @@ -2376,6 +2380,8 @@ UtfToUtfProc( dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { + int profile = TCL_ENCODING_PROFILE_GET(flags); + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the @@ -2389,34 +2395,51 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) { + /* + * TCL_ENCODING_MODIFIED is set when the target encoding is Tcl's + * internal UTF-8 modified version. + */ + if (UCHAR(*src) < 0x80 + && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) { /* - * Copy 7bit characters, but skip null-bytes when we are in input - * mode, so that they get converted to 0xC080. + * Copy 7bit characters, but skip null-bytes when target encoding + * is Tcl's "modified" UTF-8. These need to be converted to + * \xC0\x80 as is done in a later branch. */ *dst++ = *src++; - } else if ((UCHAR(*src) == 0xC0) && - (src + 1 < srcEnd) && - (UCHAR(src[1]) == 0x80) && - (!(flags & TCL_ENCODING_MODIFIED) - || (STRICT_PROFILE(flags)))) { + } + else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) + && (UCHAR(src[1]) == 0x80) + && (!(flags & TCL_ENCODING_MODIFIED) + || (profile == TCL_ENCODING_PROFILE_STRICT))) { /* - * If in input mode, and -strict or -failindex is specified: This is an error. + * \xC0\x80 and either strict profile or target is "real" UTF-8 + * - Strict profile - error + * - Non-strict, real UTF-8 - output \x00 */ if (flags & TCL_ENCODING_MODIFIED) { + /* + * TODO - should above check not be against STRICT? + * That would probably break a convertto command that goes + * from the internal UTF8 to the real UTF8. On the other + * hand this means, a strict UTF8->UTF8 transform is not + * possible using this function. + */ result = TCL_CONVERT_SYNTAX; break; } /* - * Convert 0xC080 to real nulls when we are in output mode, with or without '-strict'. + * Convert 0xC080 to real nulls when we are in output mode, + * irrespective of the profile. */ *dst++ = 0; src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* + * Incomplete byte sequence. * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves @@ -2424,17 +2447,39 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { - result = TCL_CONVERT_MULTIBYTE; + /* Incomplete bytes for modified UTF-8 target */ + if (profile == TCL_ENCODING_PROFILE_STRICT) { + result = (flags & TCL_ENCODING_CHAR_LIMIT) + ? TCL_CONVERT_MULTIBYTE + : TCL_CONVERT_SYNTAX; break; } - if (STRICT_PROFILE(flags)) { - result = TCL_CONVERT_SYNTAX; - break; + if (profile == TCL_ENCODING_PROFILE_REPLACE) { + ch = UNICODE_REPLACE_CHAR; + } else { + /* TCL_ENCODING_PROFILE_TCL8 */ + ch = UCHAR(*src); } - ch = UCHAR(*src++); - } else { + ++src; + } + else { + /* + * Incomplete bytes for real UTF-8 target. + * TODO - no profile check here because did not have any + * checks in the pre-profile code. Why? Is it because on + * output a valid internal utf-8 stream is assumed? + */ char chbuf[2]; + /* + * TODO - this code seems broken to me. + * - it does not check profiles + * - generates invalid output for real UTF-8 target + * (consider \xC2) + * A possible explanation is this behavior matches the + * Tcl8 decoding behavior of mapping invalid bytes to the same + * code point value. Still, at least strictness checks should + * be made. + */ chbuf[0] = UCHAR(*src++); chbuf[1] = 0; TclUtfToUCS4(chbuf, &ch); } @@ -2444,11 +2489,31 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); + + /* + * Valid single char encodings were already handled earlier. + * So len==1 means an invalid byte that is magically transformed + * to a code point unless it resulted from the special + * \xC0\x80 sequence. Tests io-75.* + * TODO - below check could be simplified to remove the MODIFIED + * expression I think given the checks already made above. May be. + */ +#if 0 if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) - && STRICT_PROFILE(flags)) { + && (profile == TCL_ENCODING_PROFILE_STRICT)) { result = TCL_CONVERT_SYNTAX; break; } +#else + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)) { + if (profile == TCL_ENCODING_PROFILE_STRICT) { + result = TCL_CONVERT_SYNTAX; + break; + } else if (profile == TCL_ENCODING_PROFILE_REPLACE) { + ch = UNICODE_REPLACE_CHAR; + } + } +#endif src += len; if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { @@ -2464,13 +2529,14 @@ UtfToUtfProc( /* * A surrogate character is detected, handle especially. */ + /* TODO - what about REPLACE profile? */ low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { - if (STOPONERROR) { + if (profile == TCL_ENCODING_PROFILE_STRICT) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2484,12 +2550,14 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; - } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) { + } else if ((profile == TCL_ENCODING_PROFILE_STRICT) && + !(flags & TCL_ENCODING_MODIFIED) && + (((ch & ~0x7FF) == 0xD800))) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; - } else if (STRICT_PROFILE(flags) && - (flags & TCL_ENCODING_MODIFIED) && + } else if ((profile == TCL_ENCODING_PROFILE_STRICT) && + (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; @@ -4216,6 +4284,7 @@ int TclEncodingExternalFlagsToInternal(int flags) switch (profile) { case TCL_ENCODING_PROFILE_TCL8: case TCL_ENCODING_PROFILE_STRICT: + case TCL_ENCODING_PROFILE_REPLACE: break; case 0: /* Unspecified by caller */ default: diff --git a/tests/cmdAH.test b/tests/cmdAH.test index c4053a2..52e7ac3 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -228,6 +228,9 @@ set encInvalidBytes { utf-8 A\xed\xb0\x80B default A\uDC00B -1 Low-surrogate utf-8 A\xed\xb0\x80B tcl8 A\uDC00B -1 Low-surrogate utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate + utf-8 \xed\xa0\x80\xed\xb0\x80 default \U00010000 -1 High-low-surrogate + utf-8 \xed\xa0\x80\xed\xb0\x80 tcl8 \U00010000 -1 High-low-surrogate + utf-8 \xed\xa0\x80\xed\xb0\x80 strict \U00010000 0 High-low-surrogate utf-32le \x00\xD8\x00\x00 default \uD800 -1 {High-surrogate} utf-32le \x00\xD8\x00\x00 tcl8 \uD800 -1 {High-surrogate} -- cgit v0.12 From b5095134dfebce7a33739c75d6533d90862901e3 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Feb 2023 06:15:59 +0000 Subject: Minor readability changes --- generic/tclEncoding.c | 101 ++++++++++++++++++++++++++++++++------------------ tests/cmdAH.test | 2 +- 2 files changed, 65 insertions(+), 38 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index bb1f32f..d2f3551 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -198,7 +198,20 @@ static struct TclEncodingProfiles { {"strict", TCL_ENCODING_PROFILE_STRICT}, {"replace", TCL_ENCODING_PROFILE_REPLACE}, }; +#define PROFILE_STRICT(flags_) \ + ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ + || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ + && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + +#define PROFILE_REPLACE(flags_) \ + ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ + || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ + && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + #define UNICODE_REPLACE_CHAR 0xFFFD +#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) +#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) +#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) /* * The following variable is used in the sparse matrix code for a @@ -243,6 +256,7 @@ static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; + /* * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field * of the internalrep. This should help the lifetime of encodings be more useful. @@ -2328,13 +2342,6 @@ BinaryProc( *------------------------------------------------------------------------- */ -#define STRICT_PROFILE(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) - -#define STOPONERROR STRICT_PROFILE(flags) - static int UtfToUtfProc( void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ @@ -2412,7 +2419,7 @@ UtfToUtfProc( else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) - || (profile == TCL_ENCODING_PROFILE_STRICT))) { + || PROFILE_STRICT(profile))) { /* * \xC0\x80 and either strict profile or target is "real" UTF-8 * - Strict profile - error @@ -2448,13 +2455,13 @@ UtfToUtfProc( if (flags & TCL_ENCODING_MODIFIED) { /* Incomplete bytes for modified UTF-8 target */ - if (profile == TCL_ENCODING_PROFILE_STRICT) { + if (PROFILE_STRICT(profile)) { result = (flags & TCL_ENCODING_CHAR_LIMIT) ? TCL_CONVERT_MULTIBYTE : TCL_CONVERT_SYNTAX; break; } - if (profile == TCL_ENCODING_PROFILE_REPLACE) { + if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } else { /* TCL_ENCODING_PROFILE_TCL8 */ @@ -2506,10 +2513,10 @@ UtfToUtfProc( } #else if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)) { - if (profile == TCL_ENCODING_PROFILE_STRICT) { + if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_SYNTAX; break; - } else if (profile == TCL_ENCODING_PROFILE_REPLACE) { + } else if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } } @@ -2534,9 +2541,9 @@ UtfToUtfProc( low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; - if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { + if ((!LOW_SURROGATE(low)) || (ch & 0x400)) { - if (profile == TCL_ENCODING_PROFILE_STRICT) { + if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2550,15 +2557,15 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; - } else if ((profile == TCL_ENCODING_PROFILE_STRICT) && - !(flags & TCL_ENCODING_MODIFIED) && - (((ch & ~0x7FF) == 0xD800))) { + } else if (PROFILE_STRICT(profile) && + (!(flags & TCL_ENCODING_MODIFIED)) && + SURROGATE(ch)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; - } else if ((profile == TCL_ENCODING_PROFILE_STRICT) && + } else if (PROFILE_STRICT(profile) && (flags & TCL_ENCODING_MODIFIED) && - ((ch & ~0x7FF) == 0xD800)) { + SURROGATE(ch)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -2649,12 +2656,15 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if ((unsigned)ch > 0x10FFFF - || (STRICT_PROFILE(flags) && ((ch & ~0x7FF) == 0xD800))) { - if (STOPONERROR) { + + if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } } /* @@ -2666,7 +2676,7 @@ Utf32ToUtfProc( *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); - if ((ch & ~0x3FF) == 0xD800) { + if (HIGH_SURROGATE(ch)) { /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -2750,11 +2760,14 @@ UtfToUtf32Proc( break; } len = TclUtfToUCS4(src, &ch); - if ((ch & ~0x7FF) == 0xD800) { - if (STOPONERROR) { + if (SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } } src += len; if (flags & TCL_ENCODING_LE) { @@ -2952,11 +2965,14 @@ UtfToUtf16Proc( break; } len = TclUtfToUCS4(src, &ch); - if ((ch & ~0x7FF) == 0xD800) { - if (STOPONERROR) { + if (SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } } src += len; if (flags & TCL_ENCODING_LE) { @@ -3059,6 +3075,9 @@ UtfToUcs2Proc( result = TCL_CONVERT_NOSPACE; break; } + /* TODO - there were no STRICT or NOCOMPLAIN checks here (why?) + * so no profile checks either for now. */ + #if TCL_UTF_MAX < 4 src += (len = TclUtfToUniChar(src, &ch)); if ((ch >= 0xD800) && (len < 3)) { @@ -3163,23 +3182,30 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { + /* + * TODO - this is broken. For consistency with other + * decoders, an error should be raised only if strict. + * However, doing that check cause a whole bunch of test + * failures. Need to verify if those tests are in fact + * correct. + */ src--; result = TCL_CONVERT_MULTIBYTE; break; } - ch = toUnicode[byte][*((unsigned char *) src)]; + ch = toUnicode[byte][*((unsigned char *)src)]; } else { ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } if (prefixBytes[byte]) { src--; } - ch = (Tcl_UniChar) byte; + ch = (Tcl_UniChar)byte; } /* @@ -3288,11 +3314,11 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } - word = dataPtr->fallback; + word = dataPtr->fallback; /* Both profiles REPLACE and TCL8 */ } if (prefixBytes[(word >> 8)] != 0) { if (dst + 1 > dstEnd) { @@ -3476,7 +3502,7 @@ Iso88591FromUtfProc( || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3489,7 +3515,7 @@ Iso88591FromUtfProc( * Plunge on, using '?' as a fallback character. */ - ch = (Tcl_UniChar) '?'; + ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */ } if (dst > dstEnd) { @@ -3703,9 +3729,10 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if (!STOPONERROR) { + if (!PROFILE_STRICT(flags)) { /* - * Skip the unknown escape sequence. + * Skip the unknown escape sequence. TODO - bug? + * May be replace with UNICODE_REPLACE_CHAR? */ src += longest; @@ -3878,7 +3905,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 52e7ac3..7b2d99f 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -179,7 +179,7 @@ set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |: set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} -set encProfiles {tcl8 strict} +set encProfiles {tcl8 strict replace} # TODO - valid sequences for different encodings - shiftjis etc. # Note utf-16, utf-32 missing because they are automatically -- cgit v0.12 From bf448a6421c4fd0340d6bba70aba3b0a713d049b Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Feb 2023 11:04:16 +0000 Subject: Added 'encoding profiles' --- generic/tclEncoding.c | 31 ++++++++++++++++++++++++++++++- tests/cmdAH.test | 9 +++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d2f3551..e8e1756 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -4278,7 +4278,7 @@ TclEncodingProfileIdToName( } return NULL; } - + /* *------------------------------------------------------------------------ * @@ -4321,6 +4321,35 @@ int TclEncodingExternalFlagsToInternal(int flags) } return flags; } + +/* + *------------------------------------------------------------------------ + * + * TclGetEncodingProfiles -- + * + * Get the list of supported encoding profiles. + * + * Results: + * None. + * + * Side effects: + * The list of profile names is stored in the interpreter result. + * + *------------------------------------------------------------------------ + */ +void +TclGetEncodingProfiles(Tcl_Interp *interp) +{ + int i, n; + Tcl_Obj *objPtr; + n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); + objPtr = Tcl_NewListObj(n, NULL); + for (i = 0; i < n; ++i) { + Tcl_ListObjAppendElement( + interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, -1)); + } + Tcl_SetObjResult(interp, objPtr); +} /* * Local Variables: diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 7b2d99f..c666513 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,6 +178,7 @@ set "numargErrors(encoding system)" {^wrong # args: should be "(encoding |::tcl: set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} +set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} set encProfiles {tcl8 strict replace} @@ -202,6 +203,7 @@ set encValidStrings { set encInvalidBytes { ascii \x41\xe9\x42 default A\u00E9B -1 {non-ASCII} ascii \x41\xe9\x42 tcl8 A\u00E9B -1 {non-ASCII} + ascii \x41\xe9\x42 replace A\uFFFDB -1 {non-ASCII} ascii \x41\xe9\x42 strict A 1 {non-ASCII} utf-8 \x41\xC0\x42 default A\u00C0B -1 C0 @@ -565,6 +567,13 @@ test cmdAH-4.5.2 {encoding names should include at least utf-8 and iso8859-1 and } -result {1 1 1} # +# encoding profiles 4.6.* +badnumargs cmdAH-4.6.1 {encoding profiles} {foo} +test cmdAH-4.6.2 {encoding profiles} -body { + lsort [encoding profiles] +} -result {replace strict tcl8} + +# # file command test cmdAH-5.1 {Tcl_FileObjCmd} -returnCodes error -body { -- cgit v0.12 From 0c764d2b03ab2b8daf95b3a25a470b56dffdad4f Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Feb 2023 16:56:17 +0000 Subject: Minor fixes and tests --- generic/tclCmdAH.c | 30 ++++++++++++++++++++++++++++++ generic/tclEncoding.c | 22 ++++++++++------------ generic/tclInt.h | 1 + tests/cmdAH.test | 7 ++++++- tests/socket.test | 2 +- 5 files changed, 48 insertions(+), 14 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 5fbe27e..692c75b 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -51,6 +51,7 @@ static Tcl_ObjCmdProc EncodingConvertfromObjCmd; static Tcl_ObjCmdProc EncodingConverttoObjCmd; static Tcl_ObjCmdProc EncodingDirsObjCmd; static Tcl_ObjCmdProc EncodingNamesObjCmd; +static Tcl_ObjCmdProc EncodingProfilesObjCmd; static Tcl_ObjCmdProc EncodingSystemObjCmd; static inline int ForeachAssignments(Tcl_Interp *interp, struct ForeachState *statePtr); @@ -519,6 +520,7 @@ TclInitEncodingCmd( {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, + {"profiles", EncodingProfilesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, {"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {NULL, NULL, NULL, NULL, NULL, 0} }; @@ -891,6 +893,34 @@ EncodingNamesObjCmd( /* *----------------------------------------------------------------------------- * + * EncodingProfilesObjCmd -- + * + * This command returns a list of the available encoding profiles + * + * Results: + * Returns a standard Tcl result + * + *----------------------------------------------------------------------------- + */ + +int +EncodingProfilesObjCmd( + TCL_UNUSED(void *), + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Number of command line args */ + Tcl_Obj* const objv[]) /* Vector of command line args */ +{ + if (objc > 1) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return TCL_ERROR; + } + TclGetEncodingProfiles(interp); + return TCL_OK; +} + +/* + *----------------------------------------------------------------------------- + * * EncodingSystemObjCmd -- * * This command retrieves or changes the system encoding diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index e8e1756..fc3ac77 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -208,7 +208,7 @@ static struct TclEncodingProfiles { || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) -#define UNICODE_REPLACE_CHAR 0xFFFD +#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) #define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) #define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) @@ -547,6 +547,7 @@ FillEncodingFileMap(void) * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ #define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ +#define TCL_ENCODING_CESU8 0x400 /* TODO - Distinguishes cesu-8 from utf-8*/ void TclInitEncodingSubsystem(void) @@ -592,7 +593,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(TCL_ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(0); + type.clientData = INT2PTR(TCL_ENCODING_CESU8); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -2505,13 +2506,6 @@ UtfToUtfProc( * TODO - below check could be simplified to remove the MODIFIED * expression I think given the checks already made above. May be. */ -#if 0 - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) - && (profile == TCL_ENCODING_PROFILE_STRICT)) { - result = TCL_CONVERT_SYNTAX; - break; - } -#else if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED)) { if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_SYNTAX; @@ -2520,7 +2514,7 @@ UtfToUtfProc( ch = UNICODE_REPLACE_CHAR; } } -#endif + src += len; if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { @@ -2551,7 +2545,7 @@ UtfToUtfProc( cesu8: *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF); - *dst++ = (char) ((ch | 0x80) & 0xBF); + *dst++ = (char) ((ch | 0x80) & 0xBF); continue; } src += len; @@ -3205,7 +3199,11 @@ TableToUtfProc( if (prefixBytes[byte]) { src--; } - ch = (Tcl_UniChar)byte; + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + ch = (Tcl_UniChar)byte; + } } /* diff --git a/generic/tclInt.h b/generic/tclInt.h index 4b6303d..538b177 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2891,6 +2891,7 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); +MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* * TIP #233 (Virtualized Time) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index c666513..65ecac5 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -208,21 +208,26 @@ set encInvalidBytes { utf-8 \x41\xC0\x42 default A\u00C0B -1 C0 utf-8 \x41\xC0\x42 tcl8 A\u00C0B -1 C0 + utf-8 \x41\xC0\x42 replace A\uFFFDB -1 C0 utf-8 \x41\xC0\x42 strict A 1 C0 utf-8 \x41\x80\x42 default A\u0080B -1 80 utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 + utf-8 \x41\x80\x42 replace A\uFFFDB -1 80 utf-8 \x41\x80\x42 strict A 1 80 utf-8 \x41\xC0\x80\x42 default A\u0000B -1 C080 utf-8 \x41\xC0\x80\x42 tcl8 A\u0000B -1 C080 utf-8 \x41\xC0\x80\x42 strict A 1 C080 utf-8 \x41\xC1\x42 default A\u00C1B -1 C1 utf-8 \x41\xC1\x42 tcl8 A\u00C1B -1 C1 + utf-8 \x41\xC1\x42 replace A\uFFFDB -1 C1 utf-8 \x41\xC1\x42 strict A 1 C1 utf-8 \x41\xC2\x42 default A\u00C2B -1 C2-nontrail utf-8 \x41\xC2\x42 tcl8 A\u00C2B -1 C2-nontrail + utf-8 \x41\xC2\x42 replace A\uFFFDB -1 C2-nontrail utf-8 \x41\xC2\x42 strict A 1 C2-nontrail utf-8 \x41\xC2 default A\u00C2 -1 C2-incomplete utf-8 \x41\xC2 tcl8 A\u00C2 -1 C2-incomplete + utf-8 \x41\xC2 replace A\uFFFD -1 C2-incomplete utf-8 \x41\xC2 strict A 1 C2-incomplete utf-8 A\xed\xa0\x80B default A\uD800B -1 High-surrogate utf-8 A\xed\xa0\x80B tcl8 A\uD800B -1 High-surrogate @@ -335,7 +340,7 @@ test cmdAH-4.1.1 {encoding} -returnCodes error -body { } -result {wrong # args: should be "encoding subcommand ?arg ...?"} test cmdAH-4.1.2 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding foo -} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} +} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, profiles, or system} # # encoding system 4.2.* diff --git a/tests/socket.test b/tests/socket.test index a0fe2f7..b1435be 100644 --- a/tests/socket.test +++ b/tests/socket.test @@ -1071,7 +1071,7 @@ test socket_$af-7.3 {testing socket specific options} -constraints [list socket close $s update llength $l -} -result 22 +} -result 20 test socket_$af-7.4 {testing socket specific options} -constraints [list socket supported_$af] -setup { set timer [after 10000 "set x timed_out"] set l "" -- cgit v0.12 From 86d84d444cba1b00cf6b8771db83f21d9e6e5e13 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Feb 2023 17:34:58 +0000 Subject: Tentative fix for [bd1a60eb9] - surrogates in strict utf-8 --- generic/tclEncoding.c | 11 +++++++++-- tests/cmdAH.test | 5 +++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fc3ac77..5d099f9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -547,7 +547,8 @@ FillEncodingFileMap(void) * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ #define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ -#define TCL_ENCODING_CESU8 0x400 /* TODO - Distinguishes cesu-8 from utf-8*/ +#define TCL_ENCODING_CESU8_SOURCE 0x400 /* TODO - Distinguishes cesu-8 + * *source* from utf-8 *source* */ void TclInitEncodingSubsystem(void) @@ -593,7 +594,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(TCL_ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(TCL_ENCODING_CESU8); + type.clientData = INT2PTR(TCL_ENCODING_CESU8_SOURCE); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -2370,6 +2371,7 @@ UtfToUtfProc( const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; + int isCesu8; result = TCL_OK; @@ -2531,6 +2533,11 @@ UtfToUtfProc( * A surrogate character is detected, handle especially. */ /* TODO - what about REPLACE profile? */ + if (PROFILE_STRICT(profile) && !(flags & TCL_ENCODING_CESU8_SOURCE)) { + result = TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 65ecac5..f2aab52 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -237,8 +237,9 @@ set encInvalidBytes { utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate utf-8 \xed\xa0\x80\xed\xb0\x80 default \U00010000 -1 High-low-surrogate utf-8 \xed\xa0\x80\xed\xb0\x80 tcl8 \U00010000 -1 High-low-surrogate - utf-8 \xed\xa0\x80\xed\xb0\x80 strict \U00010000 0 High-low-surrogate - + utf-8 \xed\xa0\x80\xed\xb0\x80 strict {} 0 High-low-surrogate +} +set utf32-le-TODO { utf-32le \x00\xD8\x00\x00 default \uD800 -1 {High-surrogate} utf-32le \x00\xD8\x00\x00 tcl8 \uD800 -1 {High-surrogate} utf-32le \x00\xD8\x00\x00 strict "" 0 {High-surrogate} -- cgit v0.12 From 2974b5727951737a5b67789f4b7712cf72096ed0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Feb 2023 07:29:36 +0000 Subject: Make a start fixing [bd1a60eb9c]. WIP --- generic/tclEncoding.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0941f14..01c4eb1 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -519,7 +519,8 @@ FillEncodingFileMap(void) /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ -#define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ +#define ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ +#define ENCODING_INPUT 0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */ void TclInitEncodingSubsystem(void) @@ -561,7 +562,7 @@ TclInitEncodingSubsystem(void) type.fromUtfProc = UtfToUtfProc; type.freeProc = NULL; type.nullSize = 1; - type.clientData = INT2PTR(TCL_ENCODING_UTF); + type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); type.encodingName = "cesu-8"; @@ -1238,7 +1239,7 @@ Tcl_ExternalToUtfDStringEx( flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { - flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; + flags |= ENCODING_INPUT; } while (1) { @@ -1355,7 +1356,7 @@ Tcl_ExternalToUtf( dstLen--; } if (encodingPtr->toUtfProc == UtfToUtfProc) { - flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; + flags |= ENCODING_INPUT; } do { Tcl_EncodingState savedState = *statePtr; @@ -1450,7 +1451,7 @@ Tcl_UtfToExternalDStringEx( const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - int flags, /* Conversion control flags. */ + int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { @@ -2363,7 +2364,7 @@ UtfToUtfProc( dstStart = dst; flags |= PTR2INT(clientData); - dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6); + dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -2435,7 +2436,7 @@ UtfToUtfProc( break; } src += len; - if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { + if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; -- cgit v0.12 From 85320f8fd074a2a55f76a7c0a8290f0a195530dc Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 14 Feb 2023 11:37:35 +0000 Subject: Bug [bd1a60eb9c]. Eliminate TCL_ENCODING_UTF. --- generic/tclEncoding.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 5d099f9..778fca8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -546,7 +546,6 @@ FillEncodingFileMap(void) /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ -#define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ #define TCL_ENCODING_CESU8_SOURCE 0x400 /* TODO - Distinguishes cesu-8 * *source* from utf-8 *source* */ @@ -592,7 +591,7 @@ TclInitEncodingSubsystem(void) type.fromUtfProc = UtfToUtfProc; type.freeProc = NULL; type.nullSize = 1; - type.clientData = INT2PTR(TCL_ENCODING_UTF); + type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.clientData = INT2PTR(TCL_ENCODING_CESU8_SOURCE); type.encodingName = "cesu-8"; @@ -1269,7 +1268,7 @@ Tcl_ExternalToUtfDStringEx( flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { - flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; + flags |= TCL_ENCODING_MODIFIED; } while (1) { @@ -1386,7 +1385,7 @@ Tcl_ExternalToUtf( dstLen--; } if (encodingPtr->toUtfProc == UtfToUtfProc) { - flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; + flags |= TCL_ENCODING_MODIFIED; } do { Tcl_EncodingState savedState = *statePtr; @@ -2371,7 +2370,6 @@ UtfToUtfProc( const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; - int isCesu8; result = TCL_OK; @@ -2387,7 +2385,7 @@ UtfToUtfProc( dstStart = dst; flags |= PTR2INT(clientData); - dstEnd = dst + dstLen - ((flags & TCL_ENCODING_UTF) ? TCL_UTF_MAX : 6); + dstEnd = dst + dstLen - ((flags & TCL_ENCODING_CESU8_SOURCE) ? 6 : TCL_UTF_MAX); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { int profile = TCL_ENCODING_PROFILE_GET(flags); @@ -2518,7 +2516,7 @@ UtfToUtfProc( } src += len; - if (!(flags & TCL_ENCODING_UTF) && (ch > 0x3FF)) { + if ((flags & TCL_ENCODING_CESU8_SOURCE) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; -- cgit v0.12 From a750ed2c2475387ab61073159ebf455c2452c78e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 14 Feb 2023 11:39:35 +0000 Subject: Fix uniqueness parsing fconfigure -encoding / -encodingprofile options --- generic/tclIO.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 49f4257..8a6f76a 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7994,7 +7994,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-encoding")) { + if (len == 0 || HaveOpt(8, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } @@ -8008,7 +8008,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(1, "-encodingprofile")) { + if (len == 0 || HaveOpt(9, "-encodingprofile")) { int profile; const char *profileName; if (len == 0) { -- cgit v0.12 From 891d60a9ad2f9600dd9b1c3f0ce966d79a8942e8 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 14 Feb 2023 11:56:49 +0000 Subject: Remove obsolete comment --- generic/tclEncoding.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 778fca8..0f5e05f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -570,8 +570,6 @@ TclInitEncodingSubsystem(void) Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); - /* TODO - why is NOCOMPLAIN being hardcoded for encodings below? */ - /* * Create a few initial encodings. UTF-8 to UTF-8 translation is not a * no-op because it turns a stream of improperly formed UTF-8 into a -- cgit v0.12 From 38df35585000fd7245c6604e845663751a7bd524 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Feb 2023 20:50:26 +0000 Subject: Complete fix for [bd1a60eb9c]. Also fix a bug in the tableencoding. With testcases. --- generic/tclEncoding.c | 24 ++++++++++++++++-------- tests/encoding.test | 38 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 10 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 01c4eb1..c5ecc46 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2380,7 +2380,7 @@ UtfToUtfProc( result = TCL_CONVERT_NOSPACE; break; } - if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & TCL_ENCODING_MODIFIED))) { + if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) { /* * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xC080. @@ -2388,11 +2388,13 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { + && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) + || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) + || (flags & ENCODING_FAILINDEX))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ - if (flags & TCL_ENCODING_MODIFIED) { + if (flags & ENCODING_INPUT) { result = TCL_CONVERT_SYNTAX; break; } @@ -2410,7 +2412,7 @@ UtfToUtfProc( * unless the user has explicitly asked to be told. */ - if (flags & TCL_ENCODING_MODIFIED) { + if (flags & ENCODING_INPUT) { if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { result = TCL_CONVERT_MULTIBYTE; break; @@ -2430,7 +2432,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_MODIFIED) + if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { result = TCL_CONVERT_SYNTAX; break; @@ -2451,6 +2453,11 @@ UtfToUtfProc( * A surrogate character is detected, handle especially. */ + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && (flags & ENCODING_UTF)) { + result = TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } low = ch; len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; @@ -2470,12 +2477,12 @@ UtfToUtfProc( src += len; dst += Tcl_UniCharToUtf(ch, dst); ch = low; - } else if (STOPONERROR && !(flags & TCL_ENCODING_MODIFIED) && (((ch & ~0x7FF) == 0xD800))) { + } else if (STOPONERROR && !(flags & ENCODING_INPUT) && (((ch & ~0x7FF) == 0xD800))) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & TCL_ENCODING_MODIFIED) && ((ch & ~0x7FF) == 0xD800)) { + && (flags & ENCODING_INPUT) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -3117,7 +3124,8 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (STOPONERROR) { + if ((flags & ENCODING_FAILINDEX) + || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index b2b029e..bbb40d7 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -452,6 +452,24 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { binary scan $y H* z list [string length $y] $z } {2 cfbf} +test encoding-15.25 {UtfToUtfProc CESU-8} { + encoding convertfrom cesu-8 \x00 +} \x00 +test encoding-15.26 {UtfToUtfProc CESU-8} { + encoding convertfrom cesu-8 \xC0\x80 +} \x00 +test encoding-15.27 {UtfToUtfProc -strict CESU-8} { + encoding convertfrom -strict cesu-8 \xC0\x80 +} \x00 +test encoding-15.28 {UtfToUtfProc -strict CESU-8} { + encoding convertfrom -strict cesu-8 \xC0\x80 +} \x00 +test encoding-15.29 {UtfToUtfProc CESU-8} { + encoding convertto cesu-8 \x00 +} \xC0\x80 +test encoding-15.30 {UtfToUtfProc -strict CESU-8} { + encoding convertto -strict cesu-8 \x00 +} \xC0\x80 test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] @@ -584,8 +602,21 @@ test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body { list [catch {encoding convertto -nocomplain jis0208 \\} res] $res } -result {0 !)} -test encoding-19.1 {TableFromUtfProc} { -} {} +test encoding-19.1 {TableFromUtfProc} -body { + encoding convertfrom ascii AÁ +} -result AÁ +test encoding-19.2 {TableFromUtfProc} -body { + encoding convertfrom -nocomplain ascii AÁ +} -result AÁ +test encoding-19.3 {TableFromUtfProc} -body { + encoding convertfrom -strict ascii AÁ +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} +test encoding-19.4 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx ascii AÁ] [set idx] +} -result {A 1} +test encoding-19.4 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx] +} -result {A 1} test encoding-20.1 {TableFreefProc} { } {} @@ -804,6 +835,9 @@ test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body { test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { encoding convertto -nocomplain utf-8 \uD800 } -result \xED\xA0\x80 +test encoding-24.41 {Parse invalid utf-8 with -strict} -body { + encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 4c2d834fca441a8d463e3bd1a06489f0b864cf73 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Feb 2023 07:21:55 +0000 Subject: Ticket [10c2c17c32] follow-up. One output char too much with -failindex. --- generic/tclEncoding.c | 1 + tests/encoding.test | 3 +++ 2 files changed, 4 insertions(+) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c5ecc46..c4db314 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2595,6 +2595,7 @@ Utf32ToUtfProc( && ((ch & ~0x7FF) == 0xD800))) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; + ch = 0; break; } } diff --git a/tests/encoding.test b/tests/encoding.test index bbb40d7..916a84a 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -527,6 +527,9 @@ test encoding-16.15 {Utf16ToUtfProc} -body { test encoding-16.16 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 +test encoding-16.17 {Utf32ToUtfProc} -body { + list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] +} -result {A 4} test encoding-16.9 { Utf16ToUtfProc, Tcl_UniCharToUtf, surrogate pairs in utf-16 -- cgit v0.12 From 96e60d29b763fa1c662fb77e731556ddfaf9c912 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 15 Feb 2023 17:27:55 +0000 Subject: Start on expanding encoding tests --- generic/tclEncoding.c | 41 +++++------ tests/cmdAH.test | 196 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 190 insertions(+), 47 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7886910..8cd970f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2409,32 +2409,29 @@ UtfToUtfProc( */ *dst++ = *src++; - } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) - || PROFILE_STRICT(profile))) { - /* - * \xC0\x80 and either strict profile or target is "real" UTF-8 - * - Strict profile - error - * - Non-strict, real UTF-8 - output \x00 - */ - if (flags & ENCODING_INPUT) { - /* - * TODO - should above check not be against STRICT? - * That would probably break a convertto command that goes - * from the internal UTF8 to the real UTF8. On the other - * hand this means, a strict UTF8->UTF8 transform is not - * possible using this function. - */ + } + else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && + (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && + (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || + PROFILE_REPLACE(profile))) { + /* Special sequence \xC0\x80 */ + if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_SYNTAX; break; } - /* - * Convert 0xC080 to real nulls when we are in output mode, - * irrespective of the profile. - */ - *dst++ = 0; - src += 2; + if (PROFILE_REPLACE(profile)) { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + src += 1; /* C0, 80 handled in next loop iteration + since dst limit has to be checked */ + } else { + /* + * Convert 0xC080 to real nulls when we are in output mode, + * irrespective of the profile. + */ + *dst++ = 0; + src += 2; + } } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* diff --git a/tests/cmdAH.test b/tests/cmdAH.test index f2aab52..6aa3c2e 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -184,7 +184,8 @@ set encProfiles {tcl8 strict replace} # TODO - valid sequences for different encodings - shiftjis etc. # Note utf-16, utf-32 missing because they are automatically -# generated based on le/be versions. +# generated based on le/be versions. Also add all ranges from Unicode standard +# Table 3.7 set encValidStrings { ascii ABC \x41\x42\x43 utf-8 A\u0000\u03A9\u8A9E\U00010384 \x41\x00\xCE\xA9\xE8\xAA\x9E\xF0\x90\x8E\x84 @@ -194,22 +195,106 @@ set encValidStrings { utf-32be A\u0000\u03A9\u8A9E\U00010384 \x00\x00\x00\x41\x00\x00\x00\x00\x00\x00\x03\xA9\x00\x00\x8A\x9E\x00\x01\x03\x84 } -# Invalid byte sequences {encoding bytes profile prefix failindex tag} +# Invalid byte sequences. These are driven from a table with format +# {encoding bytes profile expectedresult expectedfailindex ctrl comment} +# # Note tag is used in test id generation as well. The combination -# should be unique for test ids to be unique. -# Note utf-16, utf-32 missing because they are automatically -# generated based on le/be versions. +# should be unique for test ids to be unique. Note utf-16, +# utf-32 missing because they are automatically generated based on le/be +# versions. Each entry potentially results in generation of multiple tests. +# This is controlled by the ctrl field. This should be a list of +# zero or more of the following: +# solo - the test data is the string itself +# lead - the test data is the string followed by a valid suffix +# tail - the test data is the string preceded by a prefix +# middle - the test data is the string wrapped by a prefix and suffix +# If the ctrl field is empty it is treated as all of the above +# Note if there is any other value by itself, it will cause the test to +# be skipped. This is intentional to skip known bugs. + # TODO - other encodings and test cases + +# ascii - Any byte above 127 is invalid set encInvalidBytes { + ascii 80 default \u20AC -1 {} {map to cp1252} + ascii 80 tcl8 \u20AC -1 {} {map to cp1252} + ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} + ascii 80 strict {} 0 {} {Smallest invalid byte} + + ascii 81 default \u0081 -1 {knownBug} {map to cp1252} + ascii 82 default \u201A -1 {knownBug} {map to cp1252} + ascii 83 default \u0192 -1 {knownBug} {map to cp1252} + ascii 84 default \u201E -1 {knownBug} {map to cp1252} + ascii 85 default \u2026 -1 {knownBug} {map to cp1252} + ascii 86 default \u2020 -1 {knownBug} {map to cp1252} + ascii 87 default \u2021 -1 {knownBug} {map to cp1252} + ascii 88 default \u0276 -1 {knownBug} {map to cp1252} + ascii 89 default \u2030 -1 {knownBug} {map to cp1252} + ascii 8A default \u0160 -1 {knownBug} {map to cp1252} + ascii 8B default \u2039 -1 {knownBug} {map to cp1252} + ascii 8C default \u0152 -1 {knownBug} {map to cp1252} + ascii 8D default \u008D -1 {knownBug} {map to cp1252} + ascii 8E default \u017D -1 {knownBug} {map to cp1252} + ascii 8F default \u008F -1 {knownBug} {map to cp1252} + ascii 90 default \u0090 -1 {knownBug} {map to cp1252} + ascii 91 default \u2018 -1 {knownBug} {map to cp1252} + ascii 92 default \u2019 -1 {knownBug} {map to cp1252} + ascii 93 default \u201C -1 {knownBug} {map to cp1252} + ascii 94 default \u201D -1 {knownBug} {map to cp1252} + ascii 95 default \u2022 -1 {knownBug} {map to cp1252} + ascii 96 default \u2013 -1 {knownBug} {map to cp1252} + ascii 97 default \u2014 -1 {knownBug} {map to cp1252} + ascii 98 default \u02DC -1 {knownBug} {map to cp1252} + ascii 99 default \u2122 -1 {knownBug} {map to cp1252} + ascii 9A default \u0161 -1 {knownBug} {map to cp1252} + ascii 9B default \u203A -1 {knownBug} {map to cp1252} + ascii 9C default \u0153 -1 {knownBug} {map to cp1252} + ascii 9D default \u009D -1 {knownBug} {map to cp1252} + ascii 9E default \u017E -1 {knownBug} {map to cp1252} + ascii 9F default \u0178 -1 {knownBug} {map to cp1252} + + ascii FF default \u00FF -1 {} {Largest invalid byte} + ascii FF tcl8 \u00FF -1 {} {Largest invalid byte} + ascii FF replace \uFFFD -1 {} {Largest invalid byte} + ascii FF strict {} 0 {} {Largest invalid byte} +} + +# Following invalid sequences based on Table 3.7 in the Unicode standard. +# utf-8 C0, C1, F5:FF are invalid bytes ANYWHERE. +# Exception is C080 in non-strict mode. +# +lappend encInvalidBytes {*}{ + utf-8 C0 default \u00C0 -1 {} {C0 is invalid anywhere} + utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} + utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} + utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} + + utf-8 C080 default \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} + utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} + utf-8 C080 replace \uFFFD\uFFFD -1 C080 {} {C080 -> U+0 in Tcl's internal modified UTF8} + utf-8 C080 strict {} 0 {} {C080 -> U+0 in Tcl's internal modified UTF8} + + utf-8 C1 default \u00C1 -1 {} {C1 is invalid everywhere} + utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} + utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} + utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} + utf-8 F5 default \u00F5 -1 {} {F5:FF are invalid everywhere} + utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} + utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} + utf-8 FF default \u00FF -1 {} {F5:FF are invalid everywhere} + utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} + utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} + utf-8 F5908080 default \u00F5 -1 {knownBug} {F5:FF with trailing bytes} +} + +set xxencInvalidBytes { ascii \x41\xe9\x42 default A\u00E9B -1 {non-ASCII} ascii \x41\xe9\x42 tcl8 A\u00E9B -1 {non-ASCII} ascii \x41\xe9\x42 replace A\uFFFDB -1 {non-ASCII} ascii \x41\xe9\x42 strict A 1 {non-ASCII} - - utf-8 \x41\xC0\x42 default A\u00C0B -1 C0 - utf-8 \x41\xC0\x42 tcl8 A\u00C0B -1 C0 - utf-8 \x41\xC0\x42 replace A\uFFFDB -1 C0 - utf-8 \x41\xC0\x42 strict A 1 C0 + utf-8 \x41\x80\x42 default A\u0080B -1 80 utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 utf-8 \x41\x80\x42 replace A\uFFFDB -1 80 @@ -272,7 +357,7 @@ set encUnencodableStrings { iso8859-1 A\u0141B default \x41\x3f\x42 -1 unencodable iso8859-1 A\u0141B tcl8 \x41\x3f\x42 -1 unencodable - iso8859-1 A\u0141B strict \x41 1 unencodable + iso8859-1 A\u0141B strict \x41 0 unencodable utf-8 A\uD800B default \x41\xed\xa0\x80\x42 -1 High-surrogate utf-8 A\uD800B tcl8 \x41\xed\xa0\x80\x42 -1 High-surrogate @@ -282,12 +367,28 @@ set encUnencodableStrings { utf-8 A\uDC00B strict \x41 1 High-surrogate } + if {$::tcl_platform(byteOrder) eq "littleEndian"} { set endian le } else { set endian be } +# Maps utf-{16,32}{le,be} to utf-16, utf-32 and +# others to "". Used to test utf-16, utf-32 based +# on system endianness +proc endianUtf {enc} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set endian le + } else { + set endian be + } + if {$enc eq "utf-16$endian" || $enc eq "utf-32$endian"} { + return [string range $enc 0 5] + } + return "" +} + # # Check errors for invalid number of arguments proc badnumargs {id cmd cmdargs} { @@ -394,9 +495,17 @@ testconvert cmdAH-4.3.12 { # Wrapper for verifying -failindex proc testfailindex {id converter enc data result {profile default}} { if {$profile eq "default"} { - testconvert $id "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + } } else { - testconvert $id "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + } } } @@ -410,13 +519,49 @@ foreach {enc string bytes} $encValidStrings { } } -# -failindex - invalid data -foreach {enc bytes profile prefix failidx tag} $encInvalidBytes { - testfailindex cmdAH-4.3.14.$enc.$profile.$tag convertfrom $enc $bytes [list $prefix $failidx] $profile - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testfailindex cmdAH-4.3.14.$enc.$profile.$tag convertfrom $enc $bytes [list $prefix $failidx] $profile +# -failindex - invalid data for each profile +foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + # There are multiple test cases based on location of invalid bytes + set bytes [binary format H* $hex] + set prefix A + set suffix B + set prefixLen [string length [encoding convertto $enc $prefix]] + if {$ctrl eq {} || "solo" in $ctrl} { + testfailindex xxcmdAH-4.3.14.$profile.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + # If success expected + set result $str$suffix + } else { + # Failure expected + set result "" + } + testfailindex xxcmdAH-4.3.14.$profile.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile + } + if {$ctrl eq {} || "tail" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$str + } else { + # Failure expected + set result $prefix + incr expected_failidx [string length [encoding convertto $enc $prefix]] + } + testfailindex xxcmdAH-4.3.14.$profile.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile + } + if {$ctrl eq {} || "middle" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$str$suffix + } else { + # Failure expected + set result $prefix + incr expected_failidx [string length [encoding convertto $enc $prefix]] + } + testfailindex xxcmdAH-4.3.14.$profile.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile } } @@ -437,7 +582,8 @@ foreach profile $encProfiles { # Cycle through the various combinations of encodings and profiles # for invalid byte sequences -foreach {enc bytes profile prefix failidx tag} $encInvalidBytes { +foreach {enc hex profile prefix failidx ctrl comment} $encInvalidBytes { + set bytes [binary format H* $hex] if {$failidx eq -1} { set result [list $prefix] } else { @@ -447,18 +593,18 @@ foreach {enc bytes profile prefix failidx tag} $encInvalidBytes { set result [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob] } if {$profile eq "default"} { - testconvert cmdAH-4.3.15.$enc.$profile.$tag [list encoding convertfrom $enc $bytes] {*}$result + testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result if {"utf-16$endian" eq $enc} { # utf-16le ->utf-16, utf-32be -> utf32 etc. set enc [string range $enc 0 5] - testconvert cmdAH-4.3.15.$enc.$profile.$tag [list encoding convertfrom $enc $bytes] {*}$result + testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result } } else { - testconvert cmdAH-4.3.15.$enc.$profile.$tag [list encoding convertfrom -profile $profile $enc $bytes] {*}$result + testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result if {"utf-16$endian" eq $enc} { # utf-16le ->utf-16, utf-32be -> utf32 etc. set enc [string range $enc 0 5] - testconvert cmdAH-4.3.15.$enc.$profile.$tag [list encoding convertfrom -profile $profile $enc $bytes] {*}$result + testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result } } } -- cgit v0.12 From f06c5e7af1c85806bcbce3202000670b90ab4528 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Feb 2023 20:26:10 +0000 Subject: Fix for [33ab6d08eb]: Inconsistent behavior with encoding convertfrom -failindex --- generic/tclEncoding.c | 2 +- tests/encoding.test | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..af7f30a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2433,7 +2433,7 @@ UtfToUtfProc( const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { + && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..6f1a760 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -617,9 +617,12 @@ test encoding-19.3 {TableFromUtfProc} -body { test encoding-19.4 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx ascii AÁ] [set idx] } -result {A 1} -test encoding-19.4 {TableFromUtfProc} -body { +test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx] } -result {A 1} +test encoding-19.6 {TableFromUtfProc} -body { + list [encoding convertfrom -failindex idx -strict ascii AÁB] [set idx] +} -result {A 1} test encoding-20.1 {TableFreefProc} { } {} -- cgit v0.12 From 50538911836e76d66a3526e5fe950134cca022d8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 07:59:09 +0000 Subject: Try to fix [885c86a9a0]. Doesn't work completely yet. --- generic/tclEncoding.c | 8 +++----- tests/encoding.test | 8 +++++++- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..e178f80 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2421,12 +2421,10 @@ UtfToUtfProc( result = TCL_CONVERT_SYNTAX; break; } - ch = UCHAR(*src++); - } else { - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); } + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + Tcl_UtfToUniChar(chbuf, &ch); dst += Tcl_UniCharToUtf(ch, dst); } else { int low; diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..270c351 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -762,7 +762,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { } 1 test encoding-24.15 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "Z\xE0\x80" -} -result Z\xE0\x80 +} -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} @@ -841,6 +841,12 @@ test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { test encoding-24.41 {Parse invalid utf-8 with -strict} -body { encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} +test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 +} -result \xF0\u20AC\u20AC\u20AC€€ +test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { + encoding convertfrom -nocomplain utf-8 \x80 +} -result \u20AC€€ file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 0563a789022a80cd7745d596028b570f0fb24cbb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 16:59:10 +0000 Subject: Fix [5e6ae6e05e]: Implement -strict correctly for cesu-8 --- generic/tclEncoding.c | 24 +++++++++++++++--------- tests/encoding.test | 13 ++++++++----- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index c4db314..73cbc5c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -564,7 +564,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -2388,13 +2388,13 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && (flags & ENCODING_UTF) && (!(flags & ENCODING_INPUT) + && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { /* * If in input mode, and -strict or -failindex is specified: This is an error. */ - if (flags & ENCODING_INPUT) { + if ((STOPONERROR) && (flags & ENCODING_INPUT)) { result = TCL_CONVERT_SYNTAX; break; } @@ -2430,15 +2430,21 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else { int low; - const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & ENCODING_INPUT) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { - result = TCL_CONVERT_SYNTAX; - break; + if (flags & ENCODING_INPUT) { + if ((len < 2) && (ch != 0) + && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_SYNTAX; + break; + } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF) + && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_SYNTAX; + break; + } } + const char *saveSrc = src; src += len; - if (!(flags & ENCODING_UTF) && (ch > 0x3FF)) { + if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { if (ch > 0xFFFF) { /* CESU-8 6-byte sequence for chars > U+FFFF */ ch -= 0x10000; diff --git a/tests/encoding.test b/tests/encoding.test index 916a84a..34dfafb 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -459,17 +459,20 @@ test encoding-15.26 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -strict CESU-8} { - encoding convertfrom -strict cesu-8 \xC0\x80 + encoding convertfrom -strict cesu-8 \x00 } \x00 -test encoding-15.28 {UtfToUtfProc -strict CESU-8} { +test encoding-15.28 {UtfToUtfProc -strict CESU-8} -body { encoding convertfrom -strict cesu-8 \xC0\x80 -} \x00 +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-15.29 {UtfToUtfProc CESU-8} { encoding convertto cesu-8 \x00 -} \xC0\x80 +} \x00 test encoding-15.30 {UtfToUtfProc -strict CESU-8} { encoding convertto -strict cesu-8 \x00 -} \xC0\x80 +} \x00 +test encoding-15.31 {UtfToUtfProc -strict CESU-8 (bytes F0-F4 are invalid)} -body { + encoding convertfrom -strict cesu-8 \xF1\x86\x83\x9C +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { set val [encoding convertfrom utf-16 NN] -- cgit v0.12 From 684cbb8f5cc3ed03b9349b0d322b04f1c87cc86a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Feb 2023 17:15:35 +0000 Subject: Bit more work on encoding test framework. Long way to go. --- generic/tclEncoding.c | 65 ++++---- tests/cmdAH.test | 427 ++++++++++++++++++++++++++++++++++---------------- 2 files changed, 324 insertions(+), 168 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8cd970f..470f8f3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2368,6 +2368,7 @@ UtfToUtfProc( const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; + int profile; result = TCL_OK; @@ -2385,8 +2386,8 @@ UtfToUtfProc( flags |= PTR2INT(clientData); dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); + profile = TCL_ENCODING_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - int profile = TCL_ENCODING_PROFILE_GET(flags); if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* @@ -2415,15 +2416,15 @@ UtfToUtfProc( (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || PROFILE_REPLACE(profile))) { /* Special sequence \xC0\x80 */ - if (PROFILE_STRICT(profile)) { - result = TCL_CONVERT_SYNTAX; - break; - } - - if (PROFILE_REPLACE(profile)) { - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - src += 1; /* C0, 80 handled in next loop iteration - since dst limit has to be checked */ + if (flags & ENCODING_INPUT) { + if (PROFILE_REPLACE(profile)) { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + src += 2; + } else { + /* PROFILE_STRICT */ + result = TCL_CONVERT_SYNTAX; + break; + } } else { /* * Convert 0xC080 to real nulls when we are in output mode, @@ -2432,6 +2433,7 @@ UtfToUtfProc( *dst++ = 0; src += 2; } + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* @@ -2516,32 +2518,37 @@ UtfToUtfProc( /* * A surrogate character is detected, handle especially. */ - /* TODO - what about REPLACE profile? */ if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } - - low = ch; - len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; - - if ((!LOW_SURROGATE(low)) || (ch & 0x400)) { - - if (PROFILE_STRICT(profile)) { - result = TCL_CONVERT_UNKNOWN; - src = saveSrc; - break; + if (0 && PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + src += len; + // dst += Tcl_UniCharToUtf(ch, dst); + } + else { + low = ch; + len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0; + + if ((!LOW_SURROGATE(low)) || (ch & 0x400)) { + + if (PROFILE_STRICT(profile)) { + result = TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } +cesu8: + *dst++ = (char)(((ch >> 12) | 0xE0) & 0xEF); + *dst++ = (char)(((ch >> 6) | 0x80) & 0xBF); + *dst++ = (char)((ch | 0x80) & 0xBF); + continue; } - cesu8: - *dst++ = (char) (((ch >> 12) | 0xE0) & 0xEF); - *dst++ = (char) (((ch >> 6) | 0x80) & 0xBF); - *dst++ = (char) ((ch | 0x80) & 0xBF); - continue; + src += len; + dst += Tcl_UniCharToUtf(ch, dst); + ch = low; } - src += len; - dst += Tcl_UniCharToUtf(ch, dst); - ch = low; } else if (PROFILE_STRICT(profile) && (!(flags & ENCODING_INPUT)) && SURROGATE(ch)) { diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 6aa3c2e..6386658 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -187,19 +187,18 @@ set encProfiles {tcl8 strict replace} # generated based on le/be versions. Also add all ranges from Unicode standard # Table 3.7 set encValidStrings { - ascii ABC \x41\x42\x43 - utf-8 A\u0000\u03A9\u8A9E\U00010384 \x41\x00\xCE\xA9\xE8\xAA\x9E\xF0\x90\x8E\x84 - utf-16le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\xA9\x03\x9E\x8A\x00\xD8\x84\xDF - utf-16be A\u0000\u03A9\u8A9E\U00010384 \x00\x41\x00\x00\x03\xA9\x8A\x9E\xD8\x00\xDF\x84 - utf-32le A\u0000\u03A9\u8A9E\U00010384 \x41\x00\x00\x00\x00\x00\x00\x00\xA9\x03\x00\x00\x9E\x8A\x00\x00\x84\x03\x01\x00 - utf-32be A\u0000\u03A9\u8A9E\U00010384 \x00\x00\x00\x41\x00\x00\x00\x00\x00\x00\x03\xA9\x00\x00\x8A\x9E\x00\x01\x03\x84 + ascii ABC 414243 + utf-8 A\u0000\u03A9\u8A9E\U00010384 4100CEA9E8AA9EF0908E84 + utf-16le A\u0000\u03A9\u8A9E\U00010384 41000000A9039E8A00D884DF + utf-16be A\u0000\u03A9\u8A9E\U00010384 0041000003A98A9ED800DF84 + utf-32le A\u0000\u03A9\u8A9E\U00010384 4100000000000000A90300009E8A000084030100 + utf-32be A\u0000\u03A9\u8A9E\U00010384 0000004100000000000003A900008A9E00010384 } # Invalid byte sequences. These are driven from a table with format # {encoding bytes profile expectedresult expectedfailindex ctrl comment} # -# Note tag is used in test id generation as well. The combination -# should be unique for test ids to be unique. Note utf-16, +# should be unique for test ids to be unique. Note utf-16, # utf-32 missing because they are automatically generated based on le/be # versions. Each entry potentially results in generation of multiple tests. # This is controlled by the ctrl field. This should be a list of @@ -214,13 +213,15 @@ set encValidStrings { # TODO - other encodings and test cases -# ascii - Any byte above 127 is invalid -set encInvalidBytes { - ascii 80 default \u20AC -1 {} {map to cp1252} - ascii 80 tcl8 \u20AC -1 {} {map to cp1252} +# ascii - Any byte above 127 is invalid and is mapped +# to the same numeric code point except for the range +# 80-9F which is treated as cp1252. +# This tests the TableToUtfProc code path. +lappend encInvalidBytes {*}{ + ascii 80 default \u20AC -1 {knownBug} {map to cp1252} + ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} ascii 80 strict {} 0 {} {Smallest invalid byte} - ascii 81 default \u0081 -1 {knownBug} {map to cp1252} ascii 82 default \u201A -1 {knownBug} {map to cp1252} ascii 83 default \u0192 -1 {knownBug} {map to cp1252} @@ -259,25 +260,80 @@ set encInvalidBytes { ascii FF strict {} 0 {} {Largest invalid byte} } -# Following invalid sequences based on Table 3.7 in the Unicode standard. -# utf-8 C0, C1, F5:FF are invalid bytes ANYWHERE. -# Exception is C080 in non-strict mode. -# +# utf-8 - valid sequences based on Table 3.7 in the Unicode +# standard. +# +# Code Points First Second Third Fourth Byte +# U+0000..U+007F 00..7F +# U+0080..U+07FF C2..DF 80..BF +# U+0800..U+0FFF E0 A0..BF 80..BF +# U+1000..U+CFFF E1..EC 80..BF 80..BF +# U+D000..U+D7FF ED 80..9F 80..BF +# U+E000..U+FFFF EE..EF 80..BF 80..BF +# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF +# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF +# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF +# +# Tests below are based on the "gaps" in the above table. Note ascii test +# values are repeated because internally a different code path is used +# (UtfToUtfProc). +# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 lappend encInvalidBytes {*}{ + utf-8 80 default \u20AC -1 {knownBug} {map to cp1252} + utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} + utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} + utf-8 80 strict {} 0 {} {Smallest invalid byte} + utf-8 81 default \u0081 -1 {knownBug} {map to cp1252} + utf-8 82 default \u201A -1 {knownBug} {map to cp1252} + utf-8 83 default \u0192 -1 {knownBug} {map to cp1252} + utf-8 84 default \u201E -1 {knownBug} {map to cp1252} + utf-8 85 default \u2026 -1 {knownBug} {map to cp1252} + utf-8 86 default \u2020 -1 {knownBug} {map to cp1252} + utf-8 87 default \u2021 -1 {knownBug} {map to cp1252} + utf-8 88 default \u0276 -1 {knownBug} {map to cp1252} + utf-8 89 default \u2030 -1 {knownBug} {map to cp1252} + utf-8 8A default \u0160 -1 {knownBug} {map to cp1252} + utf-8 8B default \u2039 -1 {knownBug} {map to cp1252} + utf-8 8C default \u0152 -1 {knownBug} {map to cp1252} + utf-8 8D default \u008D -1 {knownBug} {map to cp1252} + utf-8 8E default \u017D -1 {knownBug} {map to cp1252} + utf-8 8F default \u008F -1 {knownBug} {map to cp1252} + utf-8 90 default \u0090 -1 {knownBug} {map to cp1252} + utf-8 91 default \u2018 -1 {knownBug} {map to cp1252} + utf-8 92 default \u2019 -1 {knownBug} {map to cp1252} + utf-8 93 default \u201C -1 {knownBug} {map to cp1252} + utf-8 94 default \u201D -1 {knownBug} {map to cp1252} + utf-8 95 default \u2022 -1 {knownBug} {map to cp1252} + utf-8 96 default \u2013 -1 {knownBug} {map to cp1252} + utf-8 97 default \u2014 -1 {knownBug} {map to cp1252} + utf-8 98 default \u02DC -1 {knownBug} {map to cp1252} + utf-8 99 default \u2122 -1 {knownBug} {map to cp1252} + utf-8 9A default \u0161 -1 {knownBug} {map to cp1252} + utf-8 9B default \u203A -1 {knownBug} {map to cp1252} + utf-8 9C default \u0153 -1 {knownBug} {map to cp1252} + utf-8 9D default \u009D -1 {knownBug} {map to cp1252} + utf-8 9E default \u017E -1 {knownBug} {map to cp1252} + utf-8 9F default \u0178 -1 {knownBug} {map to cp1252} + utf-8 C0 default \u00C0 -1 {} {C0 is invalid anywhere} utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} - utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} - + utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} utf-8 C080 default \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} - utf-8 C080 replace \uFFFD\uFFFD -1 C080 {} {C080 -> U+0 in Tcl's internal modified UTF8} - utf-8 C080 strict {} 0 {} {C080 -> U+0 in Tcl's internal modified UTF8} - + utf-8 C080 strict {} 0 {} {C080 -> invalid} + utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} utf-8 C1 default \u00C1 -1 {} {C1 is invalid everywhere} utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} + + utf-8 C1 default \u00C1 -1 {} {Require valid trail byte} + utf-8 C1 tcl8 \u00C1 -1 {} {Require valid trail byte} + utf-8 C1 replace \uFFFD -1 {} {Require valid trail byte} + utf-8 C1 strict {} 0 {} {Require valid trail byte} + + utf-8 F5 default \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} @@ -286,14 +342,14 @@ lappend encInvalidBytes {*}{ utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} - utf-8 F5908080 default \u00F5 -1 {knownBug} {F5:FF with trailing bytes} + + utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8} + utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3-9} + utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10} + utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3.11} } set xxencInvalidBytes { - ascii \x41\xe9\x42 default A\u00E9B -1 {non-ASCII} - ascii \x41\xe9\x42 tcl8 A\u00E9B -1 {non-ASCII} - ascii \x41\xe9\x42 replace A\uFFFDB -1 {non-ASCII} - ascii \x41\xe9\x42 strict A 1 {non-ASCII} utf-8 \x41\x80\x42 default A\u0080B -1 80 utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 @@ -343,31 +399,39 @@ set utf32-le-TODO { } # Strings that cannot be encoded for specific encoding / profiles -# {encoding string profile bytes failindex tag} -# Note tag is used in test id generation as well. The combination -# should be unique for test ids to be unique. +# {encoding string profile exptedresult expectedfailindex ctrl comment} +# should be unique for test ids to be unique. # Note utf-16, utf-32 missing because they are automatically # generated based on le/be versions. +# Each entry potentially results in generation of multiple tests. +# This is controlled by the ctrl field. This should be a list of +# zero or more of the following: +# solo - the test data is the string itself +# lead - the test data is the string followed by a valid suffix +# tail - the test data is the string preceded by a prefix +# middle - the test data is the string wrapped by a prefix and suffix +# If the ctrl field is empty it is treated as all of the above +# Note if there is any other value by itself, it will cause the test to +# be skipped. This is intentional to skip known bugs. # TODO - other encodings and test cases # TODO - out of range code point (note cannot be generated by \U notation) set encUnencodableStrings { - ascii A\u00e0B default \x41\x3f\x42 -1 non-ASCII - ascii A\u00e0B tcl8 \x41\x3f\x42 -1 non-ASCII - ascii A\u00e0B strict \x41 1 non-ASCII - - iso8859-1 A\u0141B default \x41\x3f\x42 -1 unencodable - iso8859-1 A\u0141B tcl8 \x41\x3f\x42 -1 unencodable - iso8859-1 A\u0141B strict \x41 0 unencodable - - utf-8 A\uD800B default \x41\xed\xa0\x80\x42 -1 High-surrogate - utf-8 A\uD800B tcl8 \x41\xed\xa0\x80\x42 -1 High-surrogate - utf-8 A\uD800B strict \x41 1 High-surrogate - utf-8 A\uDC00B default \x41\xed\xb0\x80\x42 -1 High-surrogate - utf-8 A\uDC00B tcl8 \x41\xed\xb0\x80\x42 -1 High-surrogate - utf-8 A\uDC00B strict \x41 1 High-surrogate + ascii \u00e0 default 3f -1 {} {unencodable} + ascii \u00e0 tcl8 3f -1 {} {unencodable} + ascii \u00e0 strict {} 0 {} {unencodable} + + iso8859-1 \u0141 default 3f -1 {} unencodable + iso8859-1 \u0141 tcl8 3f -1 {} unencodable + iso8859-1 \u0141 strict {} 0 {} unencodable + + utf-8 \uD800 default eda080 -1 {} High-surrogate + utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate + utf-8 \uD800 strict {} 0 {} High-surrogate + utf-8 \uDC00 default edb080 -1 {} High-surrogate + utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate + utf-8 \uDC00 strict {} 0 {} High-surrogate } - if {$::tcl_platform(byteOrder) eq "littleEndian"} { set endian le } else { @@ -437,6 +501,40 @@ proc testconvert {id body result args} { {*}$args } +proc testprofile {id converter enc profile data result args} { + if {$profile eq "default"} { + testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args + } + } else { + testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args + } + } +} + + +# Wrapper for verifying -failindex +proc testfailindex {id converter enc data result {profile default}} { + if {$profile eq "default"} { + testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + } + } else { + testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + if {[set enc [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + } + } +} + test cmdAH-4.1.1 {encoding} -returnCodes error -body { encoding } -result {wrong # args: should be "encoding subcommand ?arg ...?"} @@ -492,42 +590,110 @@ testconvert cmdAH-4.3.12 { encoding system $system } -# Wrapper for verifying -failindex -proc testfailindex {id converter enc data result {profile default}} { - if {$profile eq "default"} { - testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result - if {[set enc [endianUtf $enc]] ne ""} { - # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result +# convertfrom, convertfrom -profile + +# convertfrom ?-profile? : All valid byte sequences should be accepted by all profiles +foreach {enc str hex} $encValidStrings { + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile + testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile + testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile + testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile + } +} + +# convertfrom ?-profile? : invalid byte sequences +foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + set bytes [binary format H* $hex] + set prefix A + set suffix B + set prefixLen [string length [encoding convertto $enc $prefix]] + set result [list $str] + # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch + # so glob it out in error message pattern for now. + set errorWithoutPrefix [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob] + set errorWithPrefix [list "unexpected byte sequence starting at index [expr {$failidx+$prefixLen}]: *" -returnCodes error -match glob] + if {$ctrl eq {} || "solo" in $ctrl} { + if {$failidx == -1} { + set result [list $str] + } else { + set result $errorWithoutPrefix } - } else { - testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result - if {[set enc [endianUtf $enc]] ne ""} { - # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + testprofile cmdAH-4.3.15.$hex.solo convertfrom $enc $profile $bytes {*}$result + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + set result [list $str$suffix] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.3.15.$hex.lead convertfrom $enc $profile $bytes$suffix {*}$result + } + if {$ctrl eq {} || "tail" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix$str] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.3.15.$hex.tail convertfrom $enc $profile $prefix$bytes {*}$result + } + if {$ctrl eq {} || "middle" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix$str$suffix] + } else { + set result $errorWithPrefix } + testprofile cmdAH-4.3.15.$hex.middle convertfrom $enc $profile $prefix$bytes$suffix {*}$result } } -# -failindex - valid data -foreach {enc string bytes} $encValidStrings { - testfailindex cmdAH-4.3.13.$enc convertfrom $enc $bytes [list $string -1] - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testfailindex cmdAH-4.3.13.$enc convertfrom $enc $bytes [list $string -1] +proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } } + return $print } -# -failindex - invalid data for each profile +# convertfrom -failindex - valid data +foreach {enc str hex} $encValidStrings { + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile + testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile + testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile + testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile + } +} + + +# convertfrom -failindex, convertfrom -failindex -profile, invalid data foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { # There are multiple test cases based on location of invalid bytes - set bytes [binary format H* $hex] + set bytes [binary decode hex $hex] set prefix A set suffix B set prefixLen [string length [encoding convertto $enc $prefix]] if {$ctrl eq {} || "solo" in $ctrl} { - testfailindex xxcmdAH-4.3.14.$profile.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile } if {$ctrl eq {} || "lead" in $ctrl} { if {$failidx == -1} { @@ -537,7 +703,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { # Failure expected set result "" } - testfailindex xxcmdAH-4.3.14.$profile.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile } if {$ctrl eq {} || "tail" in $ctrl} { set expected_failidx $failidx @@ -547,9 +713,9 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { # Failure expected set result $prefix - incr expected_failidx [string length [encoding convertto $enc $prefix]] + incr expected_failidx $prefixLen } - testfailindex xxcmdAH-4.3.14.$profile.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile } if {$ctrl eq {} || "middle" in $ctrl} { set expected_failidx $failidx @@ -559,53 +725,9 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { # Failure expected set result $prefix - incr expected_failidx [string length [encoding convertto $enc $prefix]] - } - testfailindex xxcmdAH-4.3.14.$profile.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile - } -} - -# -profile - -# All valid byte sequences should be accepted by all profiles -foreach profile $encProfiles { - set i 0 - foreach {enc string bytes} $encValidStrings { - testconvert cmdAH-4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testconvert cmdAH-4.3.15.$enc.$profile.[incr i] [list encoding convertfrom $enc $bytes] $string - } - } -} - -# Cycle through the various combinations of encodings and profiles -# for invalid byte sequences -foreach {enc hex profile prefix failidx ctrl comment} $encInvalidBytes { - set bytes [binary format H* $hex] - if {$failidx eq -1} { - set result [list $prefix] - } else { - set badbyte "'\\x[string toupper [binary encode hex [string index $bytes $failidx]]]'" - # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch - # so glob it out for now. - set result [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob] - } - if {$profile eq "default"} { - testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom $enc $bytes] {*}$result - } - } else { - testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testconvert cmdAH-4.3.15.$enc.$profile.$hex [list encoding convertfrom -profile $profile $enc $bytes] {*}$result + incr expected_failidx $prefixLen } + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile } } @@ -646,41 +768,67 @@ testconvert cmdAH-4.4.12 { # -failindex - valid data foreach {enc string bytes} $encValidStrings { testfailindex cmdAH-4.4.13.$enc convertto $enc $string [list $bytes -1] - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testfailindex cmdAH-4.4.13.$enc convertto $enc $string [list $bytes -1] - } } # -failindex - invalid data -foreach {enc string profile bytes failidx tag} $encUnencodableStrings { - testfailindex cmdAH-4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testfailindex cmdAH-4.4.14.$enc.$profile.$tag convertto $enc $string [list $bytes $failidx] $profile +foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefixLen [string length [encoding convertto $enc $prefix]] + if {$ctrl eq {} || "solo" in $ctrl} { + testfailindex cmdAH-4.4.14.$string.solo convertto $enc $string [list $bytes $failidx] $profile + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + # If success expected + set result $bytes$suffix + } else { + # Failure expected + set result "" + } + testfailindex cmdAH-4.4.14.$string.lead convertto $enc $string$suffix [list $result $failidx] $profile + } + if {$ctrl eq {} || "tail" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$bytes + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.4.14.$string.tail convertto $enc $prefix$string [list $result $expected_failidx] $profile + } + if {$ctrl eq {} || "middle" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$bytes$suffix + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.4.14.$string.middle convertto $enc $prefix$string$suffix [list $result $expected_failidx] $profile } } -# -profile +# convertto -profile # All valid byte sequences should be accepted by all profiles foreach profile $encProfiles { set i 0 foreach {enc string bytes} $encValidStrings { - testconvert cmdAH-4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - testconvert cmdAH-4.4.15.$enc.$profile.[incr i] [list encoding convertto $enc $string] $bytes - } + testprofile cmdAH-4.4.15 convertto $enc $profile $string $bytes } } # Cycle through the various combinations of encodings and profiles # for invalid byte sequences -foreach {enc string profile bytes failidx tag} $encUnencodableStrings { +foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { + set bytes [binary decode hex $hex] if {$failidx eq -1} { set result [list $bytes] } else { @@ -688,19 +836,20 @@ foreach {enc string profile bytes failidx tag} $encUnencodableStrings { # so glob it out for now. set result [list "unexpected character at index $failidx: *" -returnCodes error -match glob] } + #testprofile xx convertto $enc $profile $string {*}$result if {$profile eq "default"} { - testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result + # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result if {"utf-16$endian" eq $enc} { # utf-16le ->utf-16, utf-32be -> utf32 etc. set enc [string range $enc 0 5] - testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result + # xxtestconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result } } else { - testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result + # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result if {"utf-16$endian" eq $enc} { # utf-16le ->utf-16, utf-32be -> utf32 etc. set enc [string range $enc 0 5] - testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result + # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result } } } -- cgit v0.12 From 6d674a96a1b99426cabf17e5b52272399c73e8bc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 21:29:07 +0000 Subject: Final part of [10c2c17c32]: UTF-LE32 encoder mapping of surrogates. Problem was in testcase, not in actual code --- tests/encoding.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index ed41937..a46fa5f 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -525,9 +525,9 @@ test encoding-16.13 {Utf16ToUtfProc} -body { test encoding-16.14 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC } -result \uDC00 -test encoding-16.15 {Utf16ToUtfProc} -constraints knownBug -body { +test encoding-16.15 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC -} -result \uD800\uDC00 +} -result \U010000 test encoding-16.16 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 -- cgit v0.12 From 45796af99db14504cedf31f0336e108930482ebf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Feb 2023 21:50:53 +0000 Subject: complete fix --- generic/tclEncoding.c | 10 +++++----- tests/encoding.test | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fe78e03..1d3a3eb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2417,14 +2417,14 @@ UtfToUtfProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { - result = TCL_CONVERT_SYNTAX; - break; - } + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { + result = TCL_CONVERT_SYNTAX; + break; + } } char chbuf[2]; chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - Tcl_UtfToUniChar(chbuf, &ch); + TclUtfToUCS4(chbuf, &ch); dst += Tcl_UniCharToUtf(ch, dst); } else { int low; diff --git a/tests/encoding.test b/tests/encoding.test index 1b41925..03f0273 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -849,10 +849,10 @@ test encoding-24.41 {Parse invalid utf-8 with -strict} -body { } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 -} -result \xF0\u20AC\u20AC\u20AC€€ +} -result \xF0\u20AC\u20AC\u20AC test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -nocomplain utf-8 \x80 -} -result \u20AC€€ +} -result \u20AC file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From fdbb12eced9b528f6246424cf0916b620f1783bc Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 17 Feb 2023 18:59:28 +0000 Subject: Part way through utf-8 test equivalence classes --- generic/tclEncoding.c | 4 +- library/tcltest/tcltest.tcl | 37 +++- tests/cmdAH.test | 503 +++++++++++++++++++++++++++----------------- 3 files changed, 342 insertions(+), 202 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a11e696..4d5743c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2525,10 +2525,8 @@ UtfToUtfProc( src = saveSrc; break; } - if (0 && PROFILE_REPLACE(profile)) { + if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; - src += len; - // dst += Tcl_UniCharToUtf(ch, dst); } else { low = ch; diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 94010a7..9ca7b09 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -1134,6 +1134,39 @@ proc tcltest::SafeFetch {n1 n2 op} { } } + +# tcltest::Asciify -- +# +# Transforms the passed string to contain only printable ascii characters. +# Useful for printing to terminals. Non-printables are mapped to +# \x, \u or \U sequences. +# +# Arguments: +# s - string to transform +# +# Results: +# The transformed strings +# +# Side effects: +# None. + +proc tcltest::Asciify {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + # tcltest::ConstraintInitializer -- # # Get or set a script that when evaluated in the tcltest namespace @@ -2222,12 +2255,12 @@ proc tcltest::test {name description args} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { try { - puts [outputChannel] "---- Result was:\n$actualAnswer" + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" } on error {errMsg errCode} { puts [outputChannel] "---- Result was:\n" } puts [outputChannel] "---- Result should have been\ - ($match matching):\n$result" + ($match matching):\n[Asciify $result]" } } if {$errorCodeFailure} { diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 6386658..df28b2e 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -181,6 +181,7 @@ set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} set encProfiles {tcl8 strict replace} +set encDefaultProfile tcl8; # Should reflect the default from implementation # TODO - valid sequences for different encodings - shiftjis etc. # Note utf-16, utf-32 missing because they are automatically @@ -218,43 +219,41 @@ set encValidStrings { # 80-9F which is treated as cp1252. # This tests the TableToUtfProc code path. lappend encInvalidBytes {*}{ - ascii 80 default \u20AC -1 {knownBug} {map to cp1252} ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} ascii 80 strict {} 0 {} {Smallest invalid byte} - ascii 81 default \u0081 -1 {knownBug} {map to cp1252} - ascii 82 default \u201A -1 {knownBug} {map to cp1252} - ascii 83 default \u0192 -1 {knownBug} {map to cp1252} - ascii 84 default \u201E -1 {knownBug} {map to cp1252} - ascii 85 default \u2026 -1 {knownBug} {map to cp1252} - ascii 86 default \u2020 -1 {knownBug} {map to cp1252} - ascii 87 default \u2021 -1 {knownBug} {map to cp1252} - ascii 88 default \u0276 -1 {knownBug} {map to cp1252} - ascii 89 default \u2030 -1 {knownBug} {map to cp1252} - ascii 8A default \u0160 -1 {knownBug} {map to cp1252} - ascii 8B default \u2039 -1 {knownBug} {map to cp1252} - ascii 8C default \u0152 -1 {knownBug} {map to cp1252} - ascii 8D default \u008D -1 {knownBug} {map to cp1252} - ascii 8E default \u017D -1 {knownBug} {map to cp1252} - ascii 8F default \u008F -1 {knownBug} {map to cp1252} - ascii 90 default \u0090 -1 {knownBug} {map to cp1252} - ascii 91 default \u2018 -1 {knownBug} {map to cp1252} - ascii 92 default \u2019 -1 {knownBug} {map to cp1252} - ascii 93 default \u201C -1 {knownBug} {map to cp1252} - ascii 94 default \u201D -1 {knownBug} {map to cp1252} - ascii 95 default \u2022 -1 {knownBug} {map to cp1252} - ascii 96 default \u2013 -1 {knownBug} {map to cp1252} - ascii 97 default \u2014 -1 {knownBug} {map to cp1252} - ascii 98 default \u02DC -1 {knownBug} {map to cp1252} - ascii 99 default \u2122 -1 {knownBug} {map to cp1252} - ascii 9A default \u0161 -1 {knownBug} {map to cp1252} - ascii 9B default \u203A -1 {knownBug} {map to cp1252} - ascii 9C default \u0153 -1 {knownBug} {map to cp1252} - ascii 9D default \u009D -1 {knownBug} {map to cp1252} - ascii 9E default \u017E -1 {knownBug} {map to cp1252} - ascii 9F default \u0178 -1 {knownBug} {map to cp1252} - - ascii FF default \u00FF -1 {} {Largest invalid byte} + ascii 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} + ascii 82 tcl8 \u201A -1 {knownBug} {map to cp1252} + ascii 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} + ascii 84 tcl8 \u201E -1 {knownBug} {map to cp1252} + ascii 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} + ascii 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} + ascii 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} + ascii 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} + ascii 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} + ascii 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} + ascii 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} + ascii 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} + ascii 8D tcl8 \u008D -1 {knownBug} {map to cp1252} + ascii 8E tcl8 \u017D -1 {knownBug} {map to cp1252} + ascii 8F tcl8 \u008F -1 {knownBug} {map to cp1252} + ascii 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} + ascii 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} + ascii 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} + ascii 93 tcl8 \u201C -1 {knownBug} {map to cp1252} + ascii 94 tcl8 \u201D -1 {knownBug} {map to cp1252} + ascii 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} + ascii 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} + ascii 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} + ascii 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} + ascii 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} + ascii 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} + ascii 9B tcl8 \u203A -1 {knownBug} {map to cp1252} + ascii 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} + ascii 9D tcl8 \u009D -1 {knownBug} {map to cp1252} + ascii 9E tcl8 \u017E -1 {knownBug} {map to cp1252} + ascii 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} + ascii FF tcl8 \u00FF -1 {} {Largest invalid byte} ascii FF replace \uFFFD -1 {} {Largest invalid byte} ascii FF strict {} 0 {} {Largest invalid byte} @@ -279,121 +278,188 @@ lappend encInvalidBytes {*}{ # (UtfToUtfProc). # Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 lappend encInvalidBytes {*}{ - utf-8 80 default \u20AC -1 {knownBug} {map to cp1252} + utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} utf-8 80 strict {} 0 {} {Smallest invalid byte} - utf-8 81 default \u0081 -1 {knownBug} {map to cp1252} - utf-8 82 default \u201A -1 {knownBug} {map to cp1252} - utf-8 83 default \u0192 -1 {knownBug} {map to cp1252} - utf-8 84 default \u201E -1 {knownBug} {map to cp1252} - utf-8 85 default \u2026 -1 {knownBug} {map to cp1252} - utf-8 86 default \u2020 -1 {knownBug} {map to cp1252} - utf-8 87 default \u2021 -1 {knownBug} {map to cp1252} - utf-8 88 default \u0276 -1 {knownBug} {map to cp1252} - utf-8 89 default \u2030 -1 {knownBug} {map to cp1252} - utf-8 8A default \u0160 -1 {knownBug} {map to cp1252} - utf-8 8B default \u2039 -1 {knownBug} {map to cp1252} - utf-8 8C default \u0152 -1 {knownBug} {map to cp1252} - utf-8 8D default \u008D -1 {knownBug} {map to cp1252} - utf-8 8E default \u017D -1 {knownBug} {map to cp1252} - utf-8 8F default \u008F -1 {knownBug} {map to cp1252} - utf-8 90 default \u0090 -1 {knownBug} {map to cp1252} - utf-8 91 default \u2018 -1 {knownBug} {map to cp1252} - utf-8 92 default \u2019 -1 {knownBug} {map to cp1252} - utf-8 93 default \u201C -1 {knownBug} {map to cp1252} - utf-8 94 default \u201D -1 {knownBug} {map to cp1252} - utf-8 95 default \u2022 -1 {knownBug} {map to cp1252} - utf-8 96 default \u2013 -1 {knownBug} {map to cp1252} - utf-8 97 default \u2014 -1 {knownBug} {map to cp1252} - utf-8 98 default \u02DC -1 {knownBug} {map to cp1252} - utf-8 99 default \u2122 -1 {knownBug} {map to cp1252} - utf-8 9A default \u0161 -1 {knownBug} {map to cp1252} - utf-8 9B default \u203A -1 {knownBug} {map to cp1252} - utf-8 9C default \u0153 -1 {knownBug} {map to cp1252} - utf-8 9D default \u009D -1 {knownBug} {map to cp1252} - utf-8 9E default \u017E -1 {knownBug} {map to cp1252} - utf-8 9F default \u0178 -1 {knownBug} {map to cp1252} - - utf-8 C0 default \u00C0 -1 {} {C0 is invalid anywhere} + utf-8 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} + utf-8 82 tcl8 \u201A -1 {knownBug} {map to cp1252} + utf-8 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} + utf-8 84 tcl8 \u201E -1 {knownBug} {map to cp1252} + utf-8 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} + utf-8 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} + utf-8 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} + utf-8 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} + utf-8 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} + utf-8 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} + utf-8 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} + utf-8 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} + utf-8 8D tcl8 \u008D -1 {knownBug} {map to cp1252} + utf-8 8E tcl8 \u017D -1 {knownBug} {map to cp1252} + utf-8 8F tcl8 \u008F -1 {knownBug} {map to cp1252} + utf-8 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} + utf-8 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} + utf-8 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} + utf-8 93 tcl8 \u201C -1 {knownBug} {map to cp1252} + utf-8 94 tcl8 \u201D -1 {knownBug} {map to cp1252} + utf-8 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} + utf-8 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} + utf-8 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} + utf-8 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} + utf-8 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} + utf-8 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} + utf-8 9B tcl8 \u203A -1 {knownBug} {map to cp1252} + utf-8 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} + utf-8 9D tcl8 \u009D -1 {knownBug} {map to cp1252} + utf-8 9E tcl8 \u017E -1 {knownBug} {map to cp1252} + utf-8 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} + utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} - utf-8 C080 default \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} utf-8 C080 strict {} 0 {} {C080 -> invalid} utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} - utf-8 C1 default \u00C1 -1 {} {C1 is invalid everywhere} utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} - utf-8 C1 default \u00C1 -1 {} {Require valid trail byte} - utf-8 C1 tcl8 \u00C1 -1 {} {Require valid trail byte} - utf-8 C1 replace \uFFFD -1 {} {Require valid trail byte} - utf-8 C1 strict {} 0 {} {Require valid trail byte} - + utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte} + utf-8 C2 replace \uFFFD -1 {} {Missing trail byte} + utf-8 C2 strict {} 0 {} {Missing trail byte} + utf-8 C27F tcl8 \u00C2\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DF tcl8 \u00DF -1 {} {Missing trail byte} + utf-8 DF replace \uFFFD -1 {} {Missing trail byte} + utf-8 DF strict {} 0 {} {Missing trail byte} + utf-8 DF7F tcl8 \u00DF\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DFE0A080 tcl8 \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 strict {} 0 {} {Invalid trail byte is start of valid sequence} + + utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte} + utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E0 strict {} 0 {} {Missing trail byte} + utf-8 E080 tcl8 \u00E0\u20AC -1 {knownBug} {First trail byte must be A0:BF} + utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E09F tcl8 \u00E0\u0178 -1 {knownBug} {First trail byte must be A0:BF} + utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F tcl8 \u00E0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 E1 tcl8 \u00E1 -1 {} {Missing trail byte} + utf-8 E1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E1 strict {} 0 {} {Missing trail byte} + utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EC tcl8 \u00EC -1 {} {Missing trail byte} + utf-8 EC replace \uFFFD -1 {} {Missing trail byte} + utf-8 EC strict {} 0 {} {Missing trail byte} + utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} + utf-8 ED replace \uFFFD -1 {} {Missing trail byte} + utf-8 ED strict {} 0 {} {Missing trail byte} + utf-8 ED7F tcl8 \u00ED\u7F -1 {knownBug} {First trail byte must be 80:9F} + utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} + utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {knownBug} {First trail byte must be 80:9F} + utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} + utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} + utf-8 EDA080 replace \uFFFD -1 {} {High surrogate} + utf-8 EDA080 strict {} 0 {} {High surrogate} + utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} + utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate} + utf-8 EDAFBF strict {} 0 {} {High surrogate} + utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} + utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate} + utf-8 EDB080 strict {} 0 {} {Low surrogate} + utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate} + utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate} + utf-8 EDBFBF strict {} 0 {} {Low surrogate} + utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} - utf-8 F5 default \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} - utf-8 FF default \u00FF -1 {} {F5:FF are invalid everywhere} utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8} - utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3-9} + utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9} utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10} - utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownBug} {Unicode Table 3.11} + utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11} } set xxencInvalidBytes { - utf-8 \x41\x80\x42 default A\u0080B -1 80 utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 utf-8 \x41\x80\x42 replace A\uFFFDB -1 80 utf-8 \x41\x80\x42 strict A 1 80 - utf-8 \x41\xC0\x80\x42 default A\u0000B -1 C080 utf-8 \x41\xC0\x80\x42 tcl8 A\u0000B -1 C080 utf-8 \x41\xC0\x80\x42 strict A 1 C080 - utf-8 \x41\xC1\x42 default A\u00C1B -1 C1 utf-8 \x41\xC1\x42 tcl8 A\u00C1B -1 C1 utf-8 \x41\xC1\x42 replace A\uFFFDB -1 C1 utf-8 \x41\xC1\x42 strict A 1 C1 - utf-8 \x41\xC2\x42 default A\u00C2B -1 C2-nontrail utf-8 \x41\xC2\x42 tcl8 A\u00C2B -1 C2-nontrail utf-8 \x41\xC2\x42 replace A\uFFFDB -1 C2-nontrail utf-8 \x41\xC2\x42 strict A 1 C2-nontrail - utf-8 \x41\xC2 default A\u00C2 -1 C2-incomplete utf-8 \x41\xC2 tcl8 A\u00C2 -1 C2-incomplete utf-8 \x41\xC2 replace A\uFFFD -1 C2-incomplete utf-8 \x41\xC2 strict A 1 C2-incomplete - utf-8 A\xed\xa0\x80B default A\uD800B -1 High-surrogate utf-8 A\xed\xa0\x80B tcl8 A\uD800B -1 High-surrogate utf-8 A\xed\xa0\x80B strict A 1 High-surrogate - utf-8 A\xed\xb0\x80B default A\uDC00B -1 Low-surrogate utf-8 A\xed\xb0\x80B tcl8 A\uDC00B -1 Low-surrogate utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate - utf-8 \xed\xa0\x80\xed\xb0\x80 default \U00010000 -1 High-low-surrogate utf-8 \xed\xa0\x80\xed\xb0\x80 tcl8 \U00010000 -1 High-low-surrogate utf-8 \xed\xa0\x80\xed\xb0\x80 strict {} 0 High-low-surrogate } set utf32-le-TODO { - utf-32le \x00\xD8\x00\x00 default \uD800 -1 {High-surrogate} utf-32le \x00\xD8\x00\x00 tcl8 \uD800 -1 {High-surrogate} utf-32le \x00\xD8\x00\x00 strict "" 0 {High-surrogate} - utf-32le \x00\xDC\x00\x00 default \uDC00 -1 {Low-surrogate} utf-32le \x00\xDC\x00\x00 tcl8 \uDC00 -1 {Low-surrogate} utf-32le \x00\xDC\x00\x00 strict "" 0 {Low-surrogate} - utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 default \uD800\uDC00 -1 {High-low-surrogate} utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 tcl8 \uD800\uDC00 -1 {High-low-surrogate} utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 strict "" 0 {High-low-surrogate} - utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 default \uDC00\uD800 -1 {High-low-surrogate} utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 tcl8 \uDC00\uD800 -1 {High-low-surrogate} utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 strict "" 0 {High-low-surrogate} - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 default A\uD800B -1 {High-surrogate-middle} utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 tcl8 A\uD800B -1 {High-surrogate-middle} utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 strict A 4 {High-surrogate-middle} } @@ -416,18 +482,14 @@ set utf32-le-TODO { # TODO - other encodings and test cases # TODO - out of range code point (note cannot be generated by \U notation) set encUnencodableStrings { - ascii \u00e0 default 3f -1 {} {unencodable} ascii \u00e0 tcl8 3f -1 {} {unencodable} ascii \u00e0 strict {} 0 {} {unencodable} - iso8859-1 \u0141 default 3f -1 {} unencodable iso8859-1 \u0141 tcl8 3f -1 {} unencodable iso8859-1 \u0141 strict {} 0 {} unencodable - utf-8 \uD800 default eda080 -1 {} High-surrogate utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate utf-8 \uD800 strict {} 0 {} High-surrogate - utf-8 \uDC00 default edb080 -1 {} High-surrogate utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate utf-8 \uDC00 strict {} 0 {} High-surrogate } @@ -453,6 +515,24 @@ proc endianUtf {enc} { return "" } +# Map arbitrary strings to printable form in ASCII. +proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + # # Check errors for invalid number of arguments proc badnumargs {id cmd cmdargs} { @@ -501,36 +581,45 @@ proc testconvert {id body result args} { {*}$args } +# Wrapper to verify encoding convert{to,from} ?-profile? +# Generates tests for compiled and uncompiled implementation. +# Also generates utf-{16,32} tests if passed encoding is utf-{16,32}{le,be} +# The enc and profile are appended to id to generate the test id proc testprofile {id converter enc profile data result args} { - if {$profile eq "default"} { - testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args - if {[set enc [endianUtf $enc]] ne ""} { - # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc.$profile [list encoding $converter $enc $data] $result {*}$args - } - } else { - testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args - if {[set enc [endianUtf $enc]] ne ""} { + testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.$profile [list encoding $converter -profile $profile $enc2 $data] $result {*}$args + } + + # If this is the default profile, generate a test without specifying profile + if {$profile eq $::encDefaultProfile} { + testconvert $id.$enc.default [list encoding $converter $enc $data] $result {*}$args + if {[set enc2 [endianUtf $enc]] ne ""} { # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args + testconvert $id.$enc2.default [list encoding $converter $enc2 $data] $result {*}$args } } } -# Wrapper for verifying -failindex +# Wrapper to verify encoding convert{to,from} -failindex ?-profile? +# Generates tests for compiled and uncompiled implementation. +# Also generates utf-{16,32} tests if passed encoding is utf-{16,32}{le,be} +# The enc and profile are appended to id to generate the test id proc testfailindex {id converter enc data result {profile default}} { - if {$profile eq "default"} { - testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result - if {[set enc [endianUtf $enc]] ne ""} { - # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc.$profile "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result - } - } else { - testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result - if {[set enc [endianUtf $enc]] ne ""} { + testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.$profile "list \[encoding $converter -profile $profile -failindex idx $enc2 $data] \[set idx]" $result + } + + # If this is the default profile, generate a test without specifying profile + if {$profile eq $::encDefaultProfile} { + testconvert $id.$enc.default "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + if {[set enc2 [endianUtf $enc]] ne ""} { # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result + testconvert $id.$enc2.default "list \[encoding $converter -failindex idx $enc2 $data] \[set idx]" $result } } } @@ -590,9 +679,7 @@ testconvert cmdAH-4.3.12 { encoding system $system } -# convertfrom, convertfrom -profile - -# convertfrom ?-profile? : All valid byte sequences should be accepted by all profiles +# convertfrom ?-profile? : valid byte sequences foreach {enc str hex} $encValidStrings { set bytes [binary decode hex $hex] set prefix A @@ -612,7 +699,9 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set bytes [binary format H* $hex] set prefix A set suffix B - set prefixLen [string length [encoding convertto $enc $prefix]] + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] set result [list $str] # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch # so glob it out in error message pattern for now. @@ -624,7 +713,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { set result $errorWithoutPrefix } - testprofile cmdAH-4.3.15.$hex.solo convertfrom $enc $profile $bytes {*}$result + testprofile cmdAH-4.3.13.$hex.solo convertfrom $enc $profile $bytes {*}$result } if {$ctrl eq {} || "lead" in $ctrl} { if {$failidx == -1} { @@ -632,7 +721,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { set result $errorWithoutPrefix } - testprofile cmdAH-4.3.15.$hex.lead convertfrom $enc $profile $bytes$suffix {*}$result + testprofile cmdAH-4.3.13.$hex.lead convertfrom $enc $profile $bytes$suffix_bytes {*}$result } if {$ctrl eq {} || "tail" in $ctrl} { if {$failidx == -1} { @@ -640,7 +729,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { set result $errorWithPrefix } - testprofile cmdAH-4.3.15.$hex.tail convertfrom $enc $profile $prefix$bytes {*}$result + testprofile cmdAH-4.3.13.$hex.tail convertfrom $enc $profile $prefix_bytes$bytes {*}$result } if {$ctrl eq {} || "middle" in $ctrl} { if {$failidx == -1} { @@ -648,28 +737,11 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } else { set result $errorWithPrefix } - testprofile cmdAH-4.3.15.$hex.middle convertfrom $enc $profile $prefix$bytes$suffix {*}$result + testprofile cmdAH-4.3.13.$hex.middle convertfrom $enc $profile $prefix_bytes$bytes$suffix_bytes {*}$result } } -proc printable {s} { - set print "" - foreach c [split $s ""] { - set i [scan $c %c] - if {[string is print $c] && ($i <= 127)} { - append print $c - } elseif {$i <= 0xff} { - append print \\x[format %02X $i] - } elseif {$i <= 0xffff} { - append print \\u[format %04X $i] - } else { - append print \\U[format %08X $i] - } - } - return $print -} - -# convertfrom -failindex - valid data +# convertfrom -failindex ?-profile? - valid data foreach {enc str hex} $encValidStrings { set bytes [binary decode hex $hex] set prefix A @@ -677,15 +749,14 @@ foreach {enc str hex} $encValidStrings { set prefix_bytes [encoding convertto $enc A] set suffix_bytes [encoding convertto $enc B] foreach profile $encProfiles { - testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile - testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile - testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile - testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str -1] $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile } } - -# convertfrom -failindex, convertfrom -failindex -profile, invalid data +# convertfrom -failindex ?-profile? - invalid data foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { # There are multiple test cases based on location of invalid bytes set bytes [binary decode hex $hex] @@ -765,19 +836,96 @@ testconvert cmdAH-4.4.12 { encoding system $system } -# -failindex - valid data -foreach {enc string bytes} $encValidStrings { - testfailindex cmdAH-4.4.13.$enc convertto $enc $string [list $bytes -1] +# convertto ?-profile? : valid byte sequences + +foreach {enc str hex} $encValidStrings { + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testprofile cmdAH-4.4.13.$printable.solo convertto $enc $profile $str $bytes + testprofile cmdAH-4.4.13.$printable.lead convertto $enc $profile $str$suffix $bytes$suffix_bytes + testprofile cmdAH-4.4.13.$printable.tail convertto $enc $profile $prefix$str $prefix_bytes$bytes + testprofile cmdAH-4.4.13.$printable.middle convertto $enc $profile $prefix$str$suffix $prefix_bytes$bytes$suffix_bytes + } +} + +# convertto ?-profile? : invalid byte sequences +foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] + set result [list $bytes] + # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch + # so glob it out in error message pattern for now. + set errorWithoutPrefix [list "unexpected character at index $failidx: *" -returnCodes error -match glob] + set errorWithPrefix [list "unexpected character at index [expr {$failidx+$prefixLen}]: *" -returnCodes error -match glob] + if {$ctrl eq {} || "solo" in $ctrl} { + if {$failidx == -1} { + set result [list $bytes] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.4.13.$printable.solo convertto $enc $profile $str {*}$result + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + set result [list $bytes$suffix_bytes] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.4.13.$printable.lead convertto $enc $profile $str$suffix {*}$result + } + if {$ctrl eq {} || "tail" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix_bytes$bytes] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.4.13.$printable.tail convertto $enc $profile $prefix$str {*}$result + } + if {$ctrl eq {} || "middle" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix_bytes$bytes$suffix_bytes] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.4.13.$printable.middle convertto $enc $profile $prefix$str$suffix {*}$result + } } -# -failindex - invalid data -foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { +# convertto -failindex ?-profile? - valid data +foreach {enc str hex} $encValidStrings { set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testfailindex cmdAH-4.4.14.$enc.$printable.solo convertto $enc $str [list $bytes -1] $profile + testfailindex cmdAH-4.4.14.$enc.$printable.lead convertto $enc $str$suffix [list $bytes$suffix_bytes -1] $profile + testfailindex cmdAH-4.4.14.$enc.$printable.tail convertto $enc $prefix$str [list $prefix_bytes$bytes -1] $profile + testfailindex cmdAH-4.4.14.$enc.$printable.middle convertto $enc $prefix$str$suffix [list $prefix_bytes$bytes$suffix_bytes -1] $profile + } +} + +# convertto -failindex ?-profile? - invalid data +foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + set bytes [binary decode hex $hex] + set printable [printable $str] set prefix A set suffix B set prefixLen [string length [encoding convertto $enc $prefix]] if {$ctrl eq {} || "solo" in $ctrl} { - testfailindex cmdAH-4.4.14.$string.solo convertto $enc $string [list $bytes $failidx] $profile + testfailindex cmdAH-4.4.14.$printable.solo convertto $enc $str [list $bytes $failidx] $profile } if {$ctrl eq {} || "lead" in $ctrl} { if {$failidx == -1} { @@ -787,7 +935,7 @@ foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { # Failure expected set result "" } - testfailindex cmdAH-4.4.14.$string.lead convertto $enc $string$suffix [list $result $failidx] $profile + testfailindex cmdAH-4.4.14.$printable.lead convertto $enc $str$suffix [list $result $failidx] $profile } if {$ctrl eq {} || "tail" in $ctrl} { set expected_failidx $failidx @@ -799,7 +947,7 @@ foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.4.14.$string.tail convertto $enc $prefix$string [list $result $expected_failidx] $profile + testfailindex cmdAH-4.4.14.$printable.tail convertto $enc $prefix$str [list $result $expected_failidx] $profile } if {$ctrl eq {} || "middle" in $ctrl} { set expected_failidx $failidx @@ -811,46 +959,7 @@ foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.4.14.$string.middle convertto $enc $prefix$string$suffix [list $result $expected_failidx] $profile - } -} - -# convertto -profile - -# All valid byte sequences should be accepted by all profiles -foreach profile $encProfiles { - set i 0 - foreach {enc string bytes} $encValidStrings { - testprofile cmdAH-4.4.15 convertto $enc $profile $string $bytes - } -} - -# Cycle through the various combinations of encodings and profiles -# for invalid byte sequences -foreach {enc string profile hex failidx ctrl comment} $encUnencodableStrings { - set bytes [binary decode hex $hex] - if {$failidx eq -1} { - set result [list $bytes] - } else { - # TODO - if the bad char is unprintable, tcltest errors out when printing a mismatch - # so glob it out for now. - set result [list "unexpected character at index $failidx: *" -returnCodes error -match glob] - } - #testprofile xx convertto $enc $profile $string {*}$result - if {$profile eq "default"} { - # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - # xxtestconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto $enc $string] {*}$result - } - } else { - # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result - if {"utf-16$endian" eq $enc} { - # utf-16le ->utf-16, utf-32be -> utf32 etc. - set enc [string range $enc 0 5] - # testconvert cmdAH-4.4.15.$enc.$profile.$tag [list encoding convertto -profile $profile $enc $string] {*}$result - } + testfailindex cmdAH-4.4.14.$printable.middle convertto $enc $prefix$str$suffix [list $result $expected_failidx] $profile } } -- cgit v0.12 From 3d2dc708451191d04cca00561cbed0295a407b11 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 18 Feb 2023 16:25:57 +0000 Subject: Done with invalid utf-8 table --- tests/cmdAH.test | 278 +++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 241 insertions(+), 37 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index df28b2e..ad315d2 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -344,11 +344,17 @@ lappend encInvalidBytes {*}{ utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} utf-8 E0 strict {} 0 {} {Missing trail byte} utf-8 E080 tcl8 \u00E0\u20AC -1 {knownBug} {First trail byte must be A0:BF} - utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} - utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} utf-8 E09F tcl8 \u00E0\u0178 -1 {knownBug} {First trail byte must be A0:BF} - utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} - utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte} + utf-8 E0A0 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0A0 strict {} 0 {} {Missing second trail byte} + utf-8 E0BF tcl8 \u00E0\u00BF -1 {} {Missing second trail byte} + utf-8 E0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0BF strict {} 0 {} {Missing second trail byte} utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF} utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF} @@ -362,6 +368,12 @@ lappend encInvalidBytes {*}{ utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF} utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 E181 tcl8 \u00E1\u0081 -1 {} {Missing second trail byte} + utf-8 E181 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E181 strict {} 0 {} {Missing second trail byte} + utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte} + utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E1BF strict {} 0 {} {Missing second trail byte} utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} @@ -374,6 +386,12 @@ lappend encInvalidBytes {*}{ utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF} utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 EC81 tcl8 \u00EC\u0081 -1 {} {Missing second trail byte} + utf-8 EC81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EC81 strict {} 0 {} {Missing second trail byte} + utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte} + utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ECBF strict {} 0 {} {Missing second trail byte} utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} @@ -381,39 +399,225 @@ lappend encInvalidBytes {*}{ utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} - utf-8 ED replace \uFFFD -1 {} {Missing trail byte} - utf-8 ED strict {} 0 {} {Missing trail byte} - utf-8 ED7F tcl8 \u00ED\u7F -1 {knownBug} {First trail byte must be 80:9F} - utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} - utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} - utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {knownBug} {First trail byte must be 80:9F} - utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} - utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} - utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} - utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {knownBug} {Second trail byte must be 80:BF} - utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} - utf-8 EDA080 replace \uFFFD -1 {} {High surrogate} - utf-8 EDA080 strict {} 0 {} {High surrogate} - utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} - utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate} - utf-8 EDAFBF strict {} 0 {} {High surrogate} - utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} - utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate} - utf-8 EDB080 strict {} 0 {} {Low surrogate} - utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate} - utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate} - utf-8 EDBFBF strict {} 0 {} {Low surrogate} - utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair} - utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair} - utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} + utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} + utf-8 ED replace \uFFFD -1 {} {Missing trail byte} + utf-8 ED strict {} 0 {} {Missing trail byte} + utf-8 ED7F tcl8 \u00ED\u7F -1 {knownBug} {First trail byte must be 80:9F} + utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} + utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {knownBug} {First trail byte must be 80:9F} + utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} + utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte} + utf-8 ED81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ED81 strict {} 0 {} {Missing second trail byte} + utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte} + utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EDBF strict {} 0 {} {Missing second trail byte} + utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} + utf-8 EDA080 replace \uFFFD -1 {} {High surrogate} + utf-8 EDA080 strict {} 0 {} {High surrogate} + utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} + utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate} + utf-8 EDAFBF strict {} 0 {} {High surrogate} + utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} + utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate} + utf-8 EDB080 strict {} 0 {} {Low surrogate} + utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate} + utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate} + utf-8 EDBFBF strict {} 0 {} {Low surrogate} + utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} + + utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte} + utf-8 EE replace \uFFFD -1 {} {Missing trail byte} + utf-8 EE strict {} 0 {} {Missing trail byte} + utf-8 EE7F tcl8 \u00EE\u7F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EED0 tcl8 \u00EE\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte} + utf-8 EE81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EE81 strict {} 0 {} {Missing second trail byte} + utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte} + utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EEBF strict {} 0 {} {Missing second trail byte} + utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EEBF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte} + utf-8 EF replace \uFFFD -1 {} {Missing trail byte} + utf-8 EF strict {} 0 {} {Missing trail byte} + utf-8 EF7F tcl8 \u00EF\u7F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte} + utf-8 EF81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EF81 strict {} 0 {} {Missing second trail byte} + utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte} + utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EFBF strict {} 0 {} {Missing second trail byte} + utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EFBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} + utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F0 strict {} 0 {} {Missing trail byte} + utf-8 F08F tcl8 \u00F0\u8F -1 {knownBug} {First trail byte must be 90:BF} + utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {knownBug} {First trail byte must be 90:BF} + utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF} + utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte} + utf-8 F090 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F090 strict {} 0 {} {Missing second trail byte} + utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte} + utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F0BF strict {} 0 {} {Missing second trail byte} + utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F090BF tcl8 \u00F0\u0090\u00BF -1 {} {Missing third trail byte} + utf-8 F090BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F090BF strict {} 0 {} {Missing third trail byte} + utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F0BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F090BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte} + utf-8 F1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F1 strict {} 0 {} {Missing trail byte} + utf-8 F17F tcl8 \u00F1\u8F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F180 tcl8 \u00F1\u0080 -1 {} {Missing second trail byte} + utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F180 strict {} 0 {} {Missing second trail byte} + utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} + utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F1BF strict {} 0 {} {Missing second trail byte} + utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {knownBug} {Missing third trail byte} + utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F180BF strict {} 0 {} {Missing third trail byte} + utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F1BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F180BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte} + utf-8 F3 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F3 strict {} 0 {} {Missing trail byte} + utf-8 F37F tcl8 \u00F3\u8F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F380 tcl8 \u00F3\u0080 -1 {} {Missing second trail byte} + utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F380 strict {} 0 {} {Missing second trail byte} + utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} + utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F3BF strict {} 0 {} {Missing second trail byte} + utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {knownBug} {Missing third trail byte} + utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F380BF strict {} 0 {} {Missing third trail byte} + utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F3BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F380BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte} + utf-8 F4 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F4 strict {} 0 {} {Missing trail byte} + utf-8 F47F tcl8 \u00F4\u7F -1 {knownBug} {First trail byte must be 80:8F} + utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F} + utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F490 tcl8 \u00F4\u0090 -1 {knownBug} {First trail byte must be 80:8F} + utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} + utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F480 tcl8 \u00F4\u0080 -1 {} {Missing second trail byte} + utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F480 strict {} 0 {} {Missing second trail byte} + utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} + utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F48F strict {} 0 {} {Missing second trail byte} + utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {knownBug} {Missing third trail byte} + utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48081 strict {} 0 {} {Missing third trail byte} + utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte} + utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48F81 strict {} 0 {} {Missing third trail byte} + utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} -- cgit v0.12 From 41c5d1cd91756ac3614489931ebe22a4095a6cf9 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 18 Feb 2023 17:41:44 +0000 Subject: Minor refactoring/fixes after merge --- generic/tclEncoding.c | 42 ++++++++++-------------------------------- tests/encoding.test | 4 ++-- 2 files changed, 12 insertions(+), 34 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2095b4c..7e5ec22 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2452,38 +2452,16 @@ UtfToUtfProc( : TCL_CONVERT_SYNTAX; break; } - if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - ++src; - } else { - /* TCL_ENCODING_PROFILE_TCL8 */ - ch = UCHAR(*src); - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); - } - } - else { - /* - * Incomplete bytes for real UTF-8 target. - * TODO - no profile check here because did not have any - * checks in the pre-profile code. Why? Is it because on - * output a valid internal utf-8 stream is assumed? - */ - char chbuf[2]; - /* - * TODO - this code seems broken to me. - * - it does not check profiles - * - generates invalid output for real UTF-8 target - * (consider \xC2) - * A possible explanation is this behavior matches the - * Tcl8 decoding behavior of mapping invalid bytes to the same - * code point value. Still, at least strictness checks should - * be made. - */ - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); - } + } + if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + ++src; + } else { + /* TCL_ENCODING_PROFILE_TCL8 */ + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); + } dst += Tcl_UniCharToUtf(ch, dst); } else { diff --git a/tests/encoding.test b/tests/encoding.test index 36728d1..7199138 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -848,10 +848,10 @@ test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 + encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80 } -result \xF0\u20AC\u20AC\u20AC test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -nocomplain utf-8 \x80 + encoding convertfrom -profile tcl8 utf-8 \x80 } -result \u20AC file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 9f595d2fa36d13395f1bfb16559f7519c08e873f Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 19 Feb 2023 07:40:29 +0000 Subject: Remove knownBug test constraints now that fix has been merged from core-8-branch --- tests/cmdAH.test | 131 +++++++++++++++++++++++++++---------------------------- 1 file changed, 65 insertions(+), 66 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 11a8188..faa604a 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -278,41 +278,40 @@ lappend encInvalidBytes {*}{ # (UtfToUtfProc). # Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 lappend encInvalidBytes {*}{ - utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} - utf-8 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} + utf-8 80 tcl8 \u20AC -1 {} {map to cp1252} utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} utf-8 80 strict {} 0 {} {Smallest invalid byte} - utf-8 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} - utf-8 82 tcl8 \u201A -1 {knownBug} {map to cp1252} - utf-8 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} - utf-8 84 tcl8 \u201E -1 {knownBug} {map to cp1252} - utf-8 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} - utf-8 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} - utf-8 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} - utf-8 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} - utf-8 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} - utf-8 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} - utf-8 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} - utf-8 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} - utf-8 8D tcl8 \u008D -1 {knownBug} {map to cp1252} - utf-8 8E tcl8 \u017D -1 {knownBug} {map to cp1252} - utf-8 8F tcl8 \u008F -1 {knownBug} {map to cp1252} - utf-8 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} - utf-8 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} - utf-8 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} - utf-8 93 tcl8 \u201C -1 {knownBug} {map to cp1252} - utf-8 94 tcl8 \u201D -1 {knownBug} {map to cp1252} - utf-8 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} - utf-8 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} - utf-8 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} - utf-8 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} - utf-8 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} - utf-8 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} - utf-8 9B tcl8 \u203A -1 {knownBug} {map to cp1252} - utf-8 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} - utf-8 9D tcl8 \u009D -1 {knownBug} {map to cp1252} - utf-8 9E tcl8 \u017E -1 {knownBug} {map to cp1252} - utf-8 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} + utf-8 81 tcl8 \u0081 -1 {} {map to cp1252} + utf-8 82 tcl8 \u201A -1 {} {map to cp1252} + utf-8 83 tcl8 \u0192 -1 {} {map to cp1252} + utf-8 84 tcl8 \u201E -1 {} {map to cp1252} + utf-8 85 tcl8 \u2026 -1 {} {map to cp1252} + utf-8 86 tcl8 \u2020 -1 {} {map to cp1252} + utf-8 87 tcl8 \u2021 -1 {} {map to cp1252} + utf-8 88 tcl8 \u02C6 -1 {} {map to cp1252} + utf-8 89 tcl8 \u2030 -1 {} {map to cp1252} + utf-8 8A tcl8 \u0160 -1 {} {map to cp1252} + utf-8 8B tcl8 \u2039 -1 {} {map to cp1252} + utf-8 8C tcl8 \u0152 -1 {} {map to cp1252} + utf-8 8D tcl8 \u008D -1 {} {map to cp1252} + utf-8 8E tcl8 \u017D -1 {} {map to cp1252} + utf-8 8F tcl8 \u008F -1 {} {map to cp1252} + utf-8 90 tcl8 \u0090 -1 {} {map to cp1252} + utf-8 91 tcl8 \u2018 -1 {} {map to cp1252} + utf-8 92 tcl8 \u2019 -1 {} {map to cp1252} + utf-8 93 tcl8 \u201C -1 {} {map to cp1252} + utf-8 94 tcl8 \u201D -1 {} {map to cp1252} + utf-8 95 tcl8 \u2022 -1 {} {map to cp1252} + utf-8 96 tcl8 \u2013 -1 {} {map to cp1252} + utf-8 97 tcl8 \u2014 -1 {} {map to cp1252} + utf-8 98 tcl8 \u02DC -1 {} {map to cp1252} + utf-8 99 tcl8 \u2122 -1 {} {map to cp1252} + utf-8 9A tcl8 \u0161 -1 {} {map to cp1252} + utf-8 9B tcl8 \u203A -1 {} {map to cp1252} + utf-8 9C tcl8 \u0153 -1 {} {map to cp1252} + utf-8 9D tcl8 \u009D -1 {} {map to cp1252} + utf-8 9E tcl8 \u017E -1 {} {map to cp1252} + utf-8 9F tcl8 \u0178 -1 {} {map to cp1252} utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} @@ -343,10 +342,10 @@ lappend encInvalidBytes {*}{ utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte} utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} utf-8 E0 strict {} 0 {} {Missing trail byte} - utf-8 E080 tcl8 \u00E0\u20AC -1 {knownBug} {First trail byte must be A0:BF} + utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF} utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} - utf-8 E09F tcl8 \u00E0\u0178 -1 {knownBug} {First trail byte must be A0:BF} + utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF} utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte} @@ -374,7 +373,7 @@ lappend encInvalidBytes {*}{ utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte} utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 E1BF strict {} 0 {} {Missing second trail byte} - utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} @@ -392,7 +391,7 @@ lappend encInvalidBytes {*}{ utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte} utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 ECBF strict {} 0 {} {Missing second trail byte} - utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF} @@ -402,10 +401,10 @@ lappend encInvalidBytes {*}{ utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} utf-8 ED replace \uFFFD -1 {} {Missing trail byte} utf-8 ED strict {} 0 {} {Missing trail byte} - utf-8 ED7F tcl8 \u00ED\u7F -1 {knownBug} {First trail byte must be 80:9F} + utf-8 ED7F tcl8 \u00ED\u7F -1 {} {First trail byte must be 80:9F} utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} - utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {knownBug} {First trail byte must be 80:9F} + utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {} {First trail byte must be 80:9F} utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte} @@ -414,10 +413,10 @@ lappend encInvalidBytes {*}{ utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte} utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 EDBF strict {} 0 {} {Missing second trail byte} - utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF} utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} @@ -442,10 +441,10 @@ lappend encInvalidBytes {*}{ utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte} utf-8 EE replace \uFFFD -1 {} {Missing trail byte} utf-8 EE strict {} 0 {} {Missing trail byte} - utf-8 EE7F tcl8 \u00EE\u7F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EE7F tcl8 \u00EE\u7F -1 {} {First trail byte must be 80:BF} utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EED0 tcl8 \u00EE\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EED0 tcl8 \u00EE\u00D0 -1 {} {First trail byte must be 80:BF} utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF} utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte} @@ -454,7 +453,7 @@ lappend encInvalidBytes {*}{ utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte} utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 EEBF strict {} 0 {} {Missing second trail byte} - utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF} @@ -463,10 +462,10 @@ lappend encInvalidBytes {*}{ utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte} utf-8 EF replace \uFFFD -1 {} {Missing trail byte} utf-8 EF strict {} 0 {} {Missing trail byte} - utf-8 EF7F tcl8 \u00EF\u7F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EF7F tcl8 \u00EF\u7F -1 {} {First trail byte must be 80:BF} utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {} {First trail byte must be 80:BF} utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF} utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte} @@ -475,7 +474,7 @@ lappend encInvalidBytes {*}{ utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte} utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 EFBF strict {} 0 {} {Missing second trail byte} - utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF} @@ -485,10 +484,10 @@ lappend encInvalidBytes {*}{ utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} utf-8 F0 strict {} 0 {} {Missing trail byte} - utf-8 F08F tcl8 \u00F0\u8F -1 {knownBug} {First trail byte must be 90:BF} + utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF} utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} - utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {knownBug} {First trail byte must be 90:BF} + utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {} {First trail byte must be 90:BF} utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF} utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF} utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte} @@ -497,7 +496,7 @@ lappend encInvalidBytes {*}{ utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte} utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F0BF strict {} 0 {} {Missing second trail byte} - utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} @@ -509,7 +508,7 @@ lappend encInvalidBytes {*}{ utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte} utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F0BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF} utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} @@ -519,10 +518,10 @@ lappend encInvalidBytes {*}{ utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte} utf-8 F1 replace \uFFFD -1 {} {Missing trail byte} utf-8 F1 strict {} 0 {} {Missing trail byte} - utf-8 F17F tcl8 \u00F1\u8F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F17F tcl8 \u00F1\u7F -1 {} {First trail byte must be 80:BF} utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {} {First trail byte must be 80:BF} utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte} @@ -531,19 +530,19 @@ lappend encInvalidBytes {*}{ utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F1BF strict {} 0 {} {Missing second trail byte} - utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {knownBug} {Missing third trail byte} + utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {} {Missing third trail byte} utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F180BF strict {} 0 {} {Missing third trail byte} utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte} utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F1BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF} utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} @@ -552,10 +551,10 @@ lappend encInvalidBytes {*}{ utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte} utf-8 F3 replace \uFFFD -1 {} {Missing trail byte} utf-8 F3 strict {} 0 {} {Missing trail byte} - utf-8 F37F tcl8 \u00F3\u8F -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F37F tcl8 \u00F3\x7F -1 {} {First trail byte must be 80:BF} utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {knownBug} {First trail byte must be 80:BF} + utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {} {First trail byte must be 80:BF} utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte} @@ -564,19 +563,19 @@ lappend encInvalidBytes {*}{ utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F3BF strict {} 0 {} {Missing second trail byte} - utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {knownBug} {Missing third trail byte} + utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {} {Missing third trail byte} utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F380BF strict {} 0 {} {Missing third trail byte} utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte} utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F3BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF} utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} @@ -586,10 +585,10 @@ lappend encInvalidBytes {*}{ utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte} utf-8 F4 replace \uFFFD -1 {} {Missing trail byte} utf-8 F4 strict {} 0 {} {Missing trail byte} - utf-8 F47F tcl8 \u00F4\u7F -1 {knownBug} {First trail byte must be 80:8F} + utf-8 F47F tcl8 \u00F4\u7F -1 {} {First trail byte must be 80:8F} utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F} utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F} - utf-8 F490 tcl8 \u00F4\u0090 -1 {knownBug} {First trail byte must be 80:8F} + utf-8 F490 tcl8 \u00F4\u0090 -1 {} {First trail byte must be 80:8F} utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte} @@ -598,19 +597,19 @@ lappend encInvalidBytes {*}{ utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte} utf-8 F48F strict {} 0 {} {Missing second trail byte} - utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {knownBug} {Second trail byte must be 80:BF} + utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF} utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF} utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {knownBug} {Missing third trail byte} + utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {} {Missing third trail byte} utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F48081 strict {} 0 {} {Missing third trail byte} utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte} utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} utf-8 F48F81 strict {} 0 {} {Missing third trail byte} - utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {knownBug} {Third trail byte must be 80:BF} + utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {} {Third trail byte must be 80:BF} utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF} utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} -- cgit v0.12 From 23d9ca0ec4772f703cd24c476d5fa485fd91e828 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Feb 2023 11:41:44 +0000 Subject: Proposed fix for [5607d6482c]: strict ucs-2 never implemented (TIP #346/#656) --- generic/tclEncoding.c | 36 ++++++++++++++++++++++++------------ tests/encoding.test | 8 +++++++- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1d3a3eb..d2b0efc 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -573,13 +573,13 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "ucs-2le"; - type.clientData = INT2PTR(TCL_ENCODING_LE|TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; - type.clientData = INT2PTR(TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; - type.clientData = INT2PTR(isLe.c|TCL_ENCODING_NOCOMPLAIN); + type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; @@ -601,13 +601,13 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; - type.clientData = INT2PTR(TCL_ENCODING_LE); + type.clientData = INT2PTR(TCL_ENCODING_LE|ENCODING_UTF); Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; - type.clientData = INT2PTR(0); + type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; - type.clientData = INT2PTR(isLe.c); + type.clientData = INT2PTR(isLe.c|ENCODING_UTF); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED @@ -2984,10 +2984,7 @@ UtfToUcs2Proc( * output buffer. */ { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; - int result, numChars; -#if TCL_UTF_MAX < 4 - int len; -#endif + int result, numChars, len; Tcl_UniChar ch = 0; flags |= PTR2INT(clientData); @@ -3017,17 +3014,32 @@ UtfToUcs2Proc( break; } #if TCL_UTF_MAX < 4 - src += (len = TclUtfToUniChar(src, &ch)); + len = TclUtfToUniChar(src, &ch); if ((ch >= 0xD800) && (len < 3)) { + if (STOPONERROR) { + result = TCL_CONVERT_UNKNOWN; + break; + } + src += len; src += TclUtfToUniChar(src, &ch); ch = 0xFFFD; } #else - src += TclUtfToUniChar(src, &ch); + len = TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { + if (STOPONERROR) { + result = TCL_CONVERT_UNKNOWN; + break; + } ch = 0xFFFD; } #endif + if (STOPONERROR && ((ch & ~0x7FF) == 0xD800)) { + result = TCL_CONVERT_SYNTAX; + break; + } + + src += len; /* * Need to handle this in a way that won't cause misalignment by diff --git a/tests/encoding.test b/tests/encoding.test index 03f0273..83e75be 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -561,7 +561,7 @@ test encoding-16.9 { test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" } -result "\xD8\xD8\xDC\xDC" -test encoding-17.2 {UtfToUcs2Proc} -body { +test encoding-17.2 {UtfToUcs2Proc, invalid testcase, see [5607d6482c]} -constraints deprecated -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { @@ -853,6 +853,12 @@ test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -nocomplain utf-8 \x80 } -result \u20AC +test encoding-24.44 {Try to generate invalid ucs-2 with -strict} -body { + encoding convertto -strict ucs-2 \uD800 +} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} +test encoding-24.45 {Try to generate invalid ucs-2 with -strict} -body { + encoding convertto -strict ucs-2 \U10000 +} -returnCodes 1 -result {unexpected character at index 0: 'U+010000'} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 41af9f9e84d0b6cee2116ff08e297db05786e6ce Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 20 Feb 2023 15:08:58 +0000 Subject: Add UTF16 and UTF32 tests --- tests/cmdAH.test | 193 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 137 insertions(+), 56 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index faa604a..1fbe6d2 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -185,15 +185,58 @@ set encDefaultProfile tcl8; # Should reflect the default from implementation # TODO - valid sequences for different encodings - shiftjis etc. # Note utf-16, utf-32 missing because they are automatically -# generated based on le/be versions. Also add all ranges from Unicode standard -# Table 3.7 +# generated based on le/be versions. set encValidStrings { - ascii ABC 414243 - utf-8 A\u0000\u03A9\u8A9E\U00010384 4100CEA9E8AA9EF0908E84 - utf-16le A\u0000\u03A9\u8A9E\U00010384 41000000A9039E8A00D884DF - utf-16be A\u0000\u03A9\u8A9E\U00010384 0041000003A98A9ED800DF84 - utf-32le A\u0000\u03A9\u8A9E\U00010384 4100000000000000A90300009E8A000084030100 - utf-32be A\u0000\u03A9\u8A9E\U00010384 0000004100000000000003A900008A9E00010384 + ascii \u0000 00 {} {Lowest ASCII} + ascii \u007F 7F knownBug {Highest ASCII} + + utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1} + utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1} + utf-8 \u0080 C280 {} {Unicode Table 3.7 Row 2} + utf-8 \u07FF DFBF {} {Unicode Table 3.7 Row 2} + utf-8 \u0800 E0A080 {} {Unicode Table 3.7 Row 3} + utf-8 \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3} + utf-8 \u1000 E18080 {} {Unicode Table 3.7 Row 4} + utf-8 \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4} + utf-8 \uD000 ED8080 {} {Unicode Table 3.7 Row 5} + utf-8 \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5} + utf-8 \uE000 EE8080 {} {Unicode Table 3.7 Row 6} + utf-8 \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6} + utf-8 \U10000 F0908080 {} {Unicode Table 3.7 Row 7} + utf-8 \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7} + utf-8 \U40000 F1808080 {} {Unicode Table 3.7 Row 8} + utf-8 \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8} + utf-8 \U100000 F4808080 {} {Unicode Table 3.7 Row 9} + utf-8 \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9} + utf-8 A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5} + + utf-16le \u0000 0000 {} {Lowest code unit} + utf-16le \uD7FF FFD7 {} {Below high surrogate range} + utf-16le \uE000 00E0 {} {Above low surrogate range} + utf-16le \uFFFF FFFF {} {Highest code unit} + utf-16le \U010000 00D800DC {} {First surrogate pair} + utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair} + utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5} + + utf-16be \u0000 0000 {} {Lowest code unit} + utf-16be \uD7FF D7FF {} {Below high surrogate range} + utf-16be \uE000 E000 {} {Above low surrogate range} + utf-16be \uFFFF FFFF {} {Highest code unit} + utf-16be \U010000 D800DC00 {} {First surrogate pair} + utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair} + utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5} + + utf-32le \u0000 00000000 {} {Lowest code unit} + utf-32le \uFFFF FFFF0000 {} {Highest BMP} + utf-32le \U010000 00000100 {} {First supplementary} + utf-32le \U10FFFF ffff1000 {} {Last supplementary} + utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5} + + utf-32be \u0000 00000000 {} {Lowest code unit} + utf-32be \uFFFF 0000FFFF {} {Highest BMP} + utf-32be \U010000 00010000 {} {First supplementary} + utf-32be \U10FFFF 0010FFFF {} {Last supplementary} + utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5} } # Invalid byte sequences. These are driven from a table with format @@ -211,8 +254,7 @@ set encValidStrings { # If the ctrl field is empty it is treated as all of the above # Note if there is any other value by itself, it will cause the test to # be skipped. This is intentional to skip known bugs. - -# TODO - other encodings and test cases +# TODO - non-UTF encodings # ascii - Any byte above 127 is invalid and is mapped # to the same numeric code point except for the range @@ -616,8 +658,6 @@ lappend encInvalidBytes {*}{ utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF} - - utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} @@ -631,42 +671,73 @@ lappend encInvalidBytes {*}{ utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11} } -set xxencInvalidBytes { - - utf-8 \x41\x80\x42 tcl8 A\u0080B -1 80 - utf-8 \x41\x80\x42 replace A\uFFFDB -1 80 - utf-8 \x41\x80\x42 strict A 1 80 - utf-8 \x41\xC0\x80\x42 tcl8 A\u0000B -1 C080 - utf-8 \x41\xC0\x80\x42 strict A 1 C080 - utf-8 \x41\xC1\x42 tcl8 A\u00C1B -1 C1 - utf-8 \x41\xC1\x42 replace A\uFFFDB -1 C1 - utf-8 \x41\xC1\x42 strict A 1 C1 - utf-8 \x41\xC2\x42 tcl8 A\u00C2B -1 C2-nontrail - utf-8 \x41\xC2\x42 replace A\uFFFDB -1 C2-nontrail - utf-8 \x41\xC2\x42 strict A 1 C2-nontrail - utf-8 \x41\xC2 tcl8 A\u00C2 -1 C2-incomplete - utf-8 \x41\xC2 replace A\uFFFD -1 C2-incomplete - utf-8 \x41\xC2 strict A 1 C2-incomplete - utf-8 A\xed\xa0\x80B tcl8 A\uD800B -1 High-surrogate - utf-8 A\xed\xa0\x80B strict A 1 High-surrogate - utf-8 A\xed\xb0\x80B tcl8 A\uDC00B -1 Low-surrogate - utf-8 A\xed\xb0\x80B strict A 1 Low-surrogate - utf-8 \xed\xa0\x80\xed\xb0\x80 tcl8 \U00010000 -1 High-low-surrogate - utf-8 \xed\xa0\x80\xed\xb0\x80 strict {} 0 High-low-surrogate +# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-16le 41 tcl8 {} -1 {solo tail} {Truncated} + utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 strict {} 0 {solo tail} {Truncated} + utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate} + utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate} } -set utf32-le-TODO { - utf-32le \x00\xD8\x00\x00 tcl8 \uD800 -1 {High-surrogate} - utf-32le \x00\xD8\x00\x00 strict "" 0 {High-surrogate} - utf-32le \x00\xDC\x00\x00 tcl8 \uDC00 -1 {Low-surrogate} - utf-32le \x00\xDC\x00\x00 strict "" 0 {Low-surrogate} - utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 tcl8 \uD800\uDC00 -1 {High-low-surrogate} - utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 strict "" 0 {High-low-surrogate} - utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 tcl8 \uDC00\uD800 -1 {High-low-surrogate} - utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 strict "" 0 {High-low-surrogate} - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 tcl8 A\uD800B -1 {High-surrogate-middle} - utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00 strict A 4 {High-surrogate-middle} + +# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-32le 41 tcl8 {} -1 {solo tail} {Truncated} + utf-32le 41 replace \uFFFD -1 {solo} {Truncated} + utf-32le 41 strict {} 0 {solo tail} {Truncated} + utf-32le 4100 tcl8 {} -1 {solo tail} {Truncated} + utf-32le 4100 replace \uFFFD -1 {solo} {Truncated} + utf-32le 4100 strict {} 0 {solo tail} {Truncated} + utf-32le 410000 tcl8 {} -1 {solo tail} {Truncated} + utf-32le 410000 replace \uFFFD -1 {solo} {Truncated} + utf-32le 410000 strict {} 0 {solo tail} {Truncated} + utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate} + utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate} + utf-32le 00D80000 strict {} 0 {} {High-surrogate} + utf-32le 00DC0000 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32le 00DC0000 replace \uFFFD -1 {} {Low-surrogate} + utf-32le 00DC0000 strict {} 0 {} {Low-surrogate} + utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 strict {} 0 {} {High-low-surrogate-pair} + utf-32le 00001100 tcl8 \UFFFD -1 {} {Out of range} + utf-32le 00001100 replace \UFFFD -1 {} {Out of range} + utf-32le 00001100 strict {} 0 {} {Out of range} + utf-32le FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF strict {} 0 {} {Out of range} + + utf-32be 41 tcl8 {} -1 {solo tail} {Truncated} + utf-32be 0041 tcl8 {} -1 {solo tail} {Truncated} + utf-32be 000041 tcl8 {} -1 {solo tail} {Truncated} + utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate} + utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate} + utf-32be 0000D800 strict {} 0 {} {High-surrogate} + utf-32be 0000DC00 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32be 0000DC00 replace \uFFFD -1 {} {Low-surrogate} + utf-32be 0000DC00 strict {} 0 {} {Low-surrogate} + utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 strict {} 0 {} {High-low-surrogate-pair} + utf-32be 00110000 tcl8 \UFFFD -1 {} {Out of range} + utf-32be 00110000 replace \UFFFD -1 {} {Out of range} + utf-32be 00110000 strict {} 0 {} {Out of range} + utf-32be FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF strict {} 0 {} {Out of range} } + # Strings that cannot be encoded for specific encoding / profiles # {encoding string profile exptedresult expectedfailindex ctrl comment} # should be unique for test ids to be unique. @@ -682,7 +753,7 @@ set utf32-le-TODO { # If the ctrl field is empty it is treated as all of the above # Note if there is any other value by itself, it will cause the test to # be skipped. This is intentional to skip known bugs. -# TODO - other encodings and test cases +# TODO - other encodings # TODO - out of range code point (note cannot be generated by \U notation) set encUnencodableStrings { ascii \u00e0 tcl8 3f -1 {} {unencodable} @@ -883,7 +954,8 @@ testconvert cmdAH-4.3.12 { } # convertfrom ?-profile? : valid byte sequences -foreach {enc str hex} $encValidStrings { +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set prefix A set suffix B @@ -899,6 +971,7 @@ foreach {enc str hex} $encValidStrings { # convertfrom ?-profile? : invalid byte sequences foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + if {"knownBug" in $ctrl} continue set bytes [binary format H* $hex] set prefix A set suffix B @@ -945,12 +1018,13 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { } # convertfrom -failindex ?-profile? - valid data -foreach {enc str hex} $encValidStrings { +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set prefix A set suffix B - set prefix_bytes [encoding convertto $enc A] - set suffix_bytes [encoding convertto $enc B] + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] foreach profile $encProfiles { testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str -1] $profile testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile @@ -961,11 +1035,14 @@ foreach {enc str hex} $encValidStrings { # convertfrom -failindex ?-profile? - invalid data foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + if {"knownBug" in $ctrl} continue # There are multiple test cases based on location of invalid bytes set bytes [binary decode hex $hex] set prefix A set suffix B - set prefixLen [string length [encoding convertto $enc $prefix]] + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] if {$ctrl eq {} || "solo" in $ctrl} { testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile } @@ -977,7 +1054,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { # Failure expected set result "" } - testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix [list $result $failidx] $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $result $failidx] $profile } if {$ctrl eq {} || "tail" in $ctrl} { set expected_failidx $failidx @@ -989,7 +1066,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix$bytes [list $result $expected_failidx] $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $result $expected_failidx] $profile } if {$ctrl eq {} || "middle" in $ctrl} { set expected_failidx $failidx @@ -1001,7 +1078,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix$bytes$suffix [list $result $expected_failidx] $profile + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $result $expected_failidx] $profile } } @@ -1041,7 +1118,8 @@ testconvert cmdAH-4.4.12 { # convertto ?-profile? : valid byte sequences -foreach {enc str hex} $encValidStrings { +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set printable [printable $str] set prefix A @@ -1058,6 +1136,7 @@ foreach {enc str hex} $encValidStrings { # convertto ?-profile? : invalid byte sequences foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set printable [printable $str] set prefix A @@ -1105,7 +1184,8 @@ foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { } # convertto -failindex ?-profile? - valid data -foreach {enc str hex} $encValidStrings { +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set printable [printable $str] set prefix A @@ -1122,6 +1202,7 @@ foreach {enc str hex} $encValidStrings { # convertto -failindex ?-profile? - invalid data foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + if {"knownBug" in $ctrl} continue set bytes [binary decode hex $hex] set printable [printable $str] set prefix A -- cgit v0.12 From fa9ac8a850701b20b6c178fdbf30b705148ffd6b Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 20 Feb 2023 15:41:15 +0000 Subject: Fix replace profile handling of truncated surrogates --- generic/tclCmdAH.c | 9 +++++---- generic/tclEncoding.c | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 41 insertions(+), 10 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 692c75b..4dfb541 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -695,7 +695,8 @@ EncodingConvertfromObjCmd( } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); - if (result != TCL_INDEX_NONE) { + if (result != TCL_INDEX_NONE && + TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; @@ -776,7 +777,8 @@ EncodingConverttoObjCmd( stringPtr = TclGetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); - if (result != TCL_INDEX_NONE) { + if (result != TCL_INDEX_NONE && + TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { /* I hope, wide int will cover size_t data type */ if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { @@ -795,8 +797,7 @@ EncodingConverttoObjCmd( Tcl_DStringFree(&ds); return TCL_ERROR; } - } - else if (failVarObj != NULL) { + } else if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7e5ec22..024570a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2594,7 +2594,7 @@ Utf32ToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars, charLimit = INT_MAX; + int result, extra, numChars, charLimit = INT_MAX; int ch = 0; flags |= PTR2INT(clientData); @@ -2606,8 +2606,9 @@ Utf32ToUtfProc( /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ - - if ((srcLen % 4) != 0) { + extra = srcLen % 4; + if (extra != 0) { + /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen &= -4; } @@ -2669,13 +2670,27 @@ Utf32ToUtfProc( } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(unsigned int); + src += 4; } if ((ch & ~0x3FF) == 0xD800) { /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is "replace", stick FFFD in its place. + */ + if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) { + src += extra; /* Go past truncated code unit */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + result = TCL_OK; + } + } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2822,7 +2837,7 @@ Utf16ToUtfProc( { const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; - int result, numChars, charLimit = INT_MAX; + int result, extra, numChars, charLimit = INT_MAX; unsigned short ch = 0; flags |= PTR2INT(clientData); @@ -2835,7 +2850,8 @@ Utf16ToUtfProc( * Check alignment with utf-16 (2 == sizeof(UTF-16)) */ - if ((srcLen % 2) != 0) { + extra = srcLen % 2; + if (extra != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } @@ -2891,6 +2907,20 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is "replace", stick FFFD in its place. + */ + if (extra && (flags & TCL_ENCODING_END) && PROFILE_REPLACE(flags)) { + ++src;/* Go past the truncated code unit */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + result = TCL_OK; + } + } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; -- cgit v0.12 From 280034d2ab7356da4aadf99bcade5d106a3da1b2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 20:41:00 +0000 Subject: Proposed fix for [4bea02e811]: encoding convertfrom -strict ascii \x7f generates exception --- library/encoding/ascii.enc | 2 +- library/encoding/cp1250.enc | 4 ++-- library/encoding/cp1251.enc | 2 +- library/encoding/cp1252.enc | 4 ++-- library/encoding/cp1253.enc | 4 ++-- library/encoding/cp1257.enc | 4 ++-- library/encoding/cp1258.enc | 4 ++-- library/encoding/cp864.enc | 2 +- library/encoding/cp869.enc | 4 ++-- library/encoding/cp874.enc | 4 ++-- library/encoding/cp932.enc | 2 +- library/encoding/cp949.enc | 2 +- library/encoding/cp950.enc | 4 ++-- library/encoding/tis-620.enc | 2 +- tools/encoding/Makefile | 7 +------ tools/encoding/ascii.txt | 1 + 16 files changed, 24 insertions(+), 28 deletions(-) diff --git a/library/encoding/ascii.enc b/library/encoding/ascii.enc index e0320b8..284a9f5 100644 --- a/library/encoding/ascii.enc +++ b/library/encoding/ascii.enc @@ -9,7 +9,7 @@ S 0040004100420043004400450046004700480049004A004B004C004D004E004F 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F -0070007100720073007400750076007700780079007A007B007C007D007E0000 +0070007100720073007400750076007700780079007A007B007C007D007E007F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/cp1250.enc b/library/encoding/cp1250.enc index 070ad90..f40b485 100644 --- a/library/encoding/cp1250.enc +++ b/library/encoding/cp1250.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0083201E2026202020210088203001602039015A0164017D0179 -009020182019201C201D202220132014009821220161203A015B0165017E017A +20AC0000201A0000201E2026202020210000203001602039015A0164017D0179 +000020182019201C201D202220132014000021220161203A015B0165017E017A 00A002C702D8014100A4010400A600A700A800A9015E00AB00AC00AD00AE017B 00B000B102DB014200B400B500B600B700B80105015F00BB013D02DD013E017C 015400C100C2010200C40139010600C7010C00C9011800CB011A00CD00CE010E diff --git a/library/encoding/cp1251.enc b/library/encoding/cp1251.enc index 376b1b4..f9513c2 100644 --- a/library/encoding/cp1251.enc +++ b/library/encoding/cp1251.enc @@ -11,7 +11,7 @@ S 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F 04020403201A0453201E20262020202120AC203004092039040A040C040B040F -045220182019201C201D202220132014009821220459203A045A045C045B045F +045220182019201C201D202220132014000021220459203A045A045C045B045F 00A0040E045E040800A4049000A600A7040100A9040400AB00AC00AD00AE0407 00B000B104060456049100B500B600B704512116045400BB0458040504550457 0410041104120413041404150416041704180419041A041B041C041D041E041F diff --git a/library/encoding/cp1252.enc b/library/encoding/cp1252.enc index dd525ea..b45a7f8 100644 --- a/library/encoding/cp1252.enc +++ b/library/encoding/cp1252.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030016020390152008D017D008F -009020182019201C201D20222013201402DC21220161203A0153009D017E0178 +20AC0000201A0192201E20262020202102C620300160203901520000017D0000 +000020182019201C201D20222013201402DC21220161203A01530000017E0178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF diff --git a/library/encoding/cp1253.enc b/library/encoding/cp1253.enc index a8754c3..dcc8084 100644 --- a/library/encoding/cp1253.enc +++ b/library/encoding/cp1253.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202100882030008A2039008C008D008E008F -009020182019201C201D20222013201400982122009A203A009C009D009E009F +20AC0000201A0192201E20262020202100002030000020390000000000000000 +000020182019201C201D202220132014000021220000203A0000000000000000 00A00385038600A300A400A500A600A700A800A9000000AB00AC00AD00AE2015 00B000B100B200B3038400B500B600B703880389038A00BB038C00BD038E038F 0390039103920393039403950396039703980399039A039B039C039D039E039F diff --git a/library/encoding/cp1257.enc b/library/encoding/cp1257.enc index 4aa135d..42c6905 100644 --- a/library/encoding/cp1257.enc +++ b/library/encoding/cp1257.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0083201E20262020202100882030008A2039008C00A802C700B8 -009020182019201C201D20222013201400982122009A203A009C00AF02DB009F +20AC0000201A0000201E2026202020210000203000002039000000A802C700B8 +000020182019201C201D202220132014000021220000203A000000AF02DB0000 00A0000000A200A300A4000000A600A700D800A9015600AB00AC00AD00AE00C6 00B000B100B200B300B400B500B600B700F800B9015700BB00BC00BD00BE00E6 0104012E0100010600C400C501180112010C00C90179011601220136012A013B diff --git a/library/encoding/cp1258.enc b/library/encoding/cp1258.enc index 95fdef8..bbe2b12 100644 --- a/library/encoding/cp1258.enc +++ b/library/encoding/cp1258.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030008A20390152008D008E008F -009020182019201C201D20222013201402DC2122009A203A0153009D009E0178 +20AC0000201A0192201E20262020202102C62030000020390152000000000000 +000020182019201C201D20222013201402DC21220000203A0153000000000178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C2010200C400C500C600C700C800C900CA00CB030000CD00CE00CF diff --git a/library/encoding/cp864.enc b/library/encoding/cp864.enc index 71f9e62..dad7c20 100644 --- a/library/encoding/cp864.enc +++ b/library/encoding/cp864.enc @@ -11,7 +11,7 @@ S 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F 00B000B72219221A259225002502253C2524252C251C25342510250C25142518 -03B2221E03C600B100BD00BC224800AB00BBFEF7FEF8009B009CFEFBFEFC009F +03B2221E03C600B100BD00BC224800AB00BBFEF7FEF800000000FEFBFEFC0000 00A000ADFE8200A300A4FE8400000000FE8EFE8FFE95FE99060CFE9DFEA1FEA5 0660066106620663066406650666066706680669FED1061BFEB1FEB5FEB9061F 00A2FE80FE81FE83FE85FECAFE8BFE8DFE91FE93FE97FE9BFE9FFEA3FEA7FEA9 diff --git a/library/encoding/cp869.enc b/library/encoding/cp869.enc index 9fd2929..4670826 100644 --- a/library/encoding/cp869.enc +++ b/library/encoding/cp869.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850386008700B700AC00A620182019038820150389 -038A03AA038C00930094038E03AB00A9038F00B200B303AC00A303AD03AE03AF +0000000000000000000000000386000000B700AC00A620182019038820150389 +038A03AA038C00000000038E03AB00A9038F00B200B303AC00A303AD03AE03AF 03CA039003CC03CD039103920393039403950396039700BD0398039900AB00BB 25912592259325022524039A039B039C039D256325512557255D039E039F2510 25142534252C251C2500253C03A003A1255A25542569256625602550256C03A3 diff --git a/library/encoding/cp874.enc b/library/encoding/cp874.enc index 0487b97..e2e8433 100644 --- a/library/encoding/cp874.enc +++ b/library/encoding/cp874.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC008100820083008420260086008700880089008A008B008C008D008E008F -009020182019201C201D20222013201400980099009A009B009C009D009E009F +20AC000000000000000020260000000000000000000000000000000000000000 +000020182019201C201D20222013201400000000000000000000000000000000 00A00E010E020E030E040E050E060E070E080E090E0A0E0B0E0C0E0D0E0E0E0F 0E100E110E120E130E140E150E160E170E180E190E1A0E1B0E1C0E1D0E1E0E1F 0E200E210E220E230E240E250E260E270E280E290E2A0E2B0E2C0E2D0E2E0E2F diff --git a/library/encoding/cp932.enc b/library/encoding/cp932.enc index 8da8cd6..0699000 100644 --- a/library/encoding/cp932.enc +++ b/library/encoding/cp932.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000850086000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/library/encoding/cp949.enc b/library/encoding/cp949.enc index 2f3ec39..459dbd9 100644 --- a/library/encoding/cp949.enc +++ b/library/encoding/cp949.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/cp950.enc b/library/encoding/cp950.enc index f33d785..f582bd9 100644 --- a/library/encoding/cp950.enc +++ b/library/encoding/cp950.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/tis-620.enc b/library/encoding/tis-620.enc index 2e9142a..af77326 100644 --- a/library/encoding/tis-620.enc +++ b/library/encoding/tis-620.enc @@ -9,7 +9,7 @@ S 0040004100420043004400450046004700480049004A004B004C004D004E004F 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F -0070007100720073007400750076007700780079007A007B007C007D007E0000 +0070007100720073007400750076007700780079007A007B007C007D007E007F 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 00000E010E020E030E040E050E060E070E080E090E0A0E0B0E0C0E0D0E0E0E0F diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 361239e..7235b47 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,15 +67,10 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc -e 0 -u 1 $$p > $$enc; \ + ./txt2enc -m $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. - @for p in ascii.txt; do \ - enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ - echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ - done @for p in jis0208.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ diff --git a/tools/encoding/ascii.txt b/tools/encoding/ascii.txt index 66ba6f3..2afbaab 100644 --- a/tools/encoding/ascii.txt +++ b/tools/encoding/ascii.txt @@ -93,3 +93,4 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE \ No newline at end of file -- cgit v0.12 From 8660fd1af23543a70d94adaec5d7b98105ad3ca3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 20:52:06 +0000 Subject: Two more files, re-generated --- library/encoding/cp1254.enc | 4 ++-- library/encoding/cp1255.enc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/library/encoding/cp1254.enc b/library/encoding/cp1254.enc index b9e3b3c..4922f3c 100644 --- a/library/encoding/cp1254.enc +++ b/library/encoding/cp1254.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030016020390152008D008E008F -009020182019201C201D20222013201402DC21220161203A0153009D009E0178 +20AC0000201A0192201E20262020202102C62030016020390152000000000000 +000020182019201C201D20222013201402DC21220161203A0153000000000178 00A000A100A200A300A400A500A600A700A800A900AA00AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900BA00BB00BC00BD00BE00BF 00C000C100C200C300C400C500C600C700C800C900CA00CB00CC00CD00CE00CF diff --git a/library/encoding/cp1255.enc b/library/encoding/cp1255.enc index 6e78b95..74ef0c1 100644 --- a/library/encoding/cp1255.enc +++ b/library/encoding/cp1255.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -20AC0081201A0192201E20262020202102C62030008A2039008C008D008E008F -009020182019201C201D20222013201402DC2122009A203A009C009D009E009F +20AC0000201A0192201E20262020202102C62030000020390000000000000000 +000020182019201C201D20222013201402DC21220000203A0000000000000000 00A000A100A200A320AA00A500A600A700A800A900D700AB00AC00AD00AE00AF 00B000B100B200B300B400B500B600B700B800B900F700BB00BC00BD00BE00BF 05B005B105B205B305B405B505B605B705B805B9000005BB05BC05BD05BE05BF -- cgit v0.12 From 3315012c955111ef840365ecd7cc4ff46a15e204 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Feb 2023 21:43:29 +0000 Subject: re-generated 8 more encodings --- library/encoding/big5.enc | 4 ++-- library/encoding/euc-cn.enc | 4 ++-- library/encoding/euc-jp.enc | 4 ++-- library/encoding/euc-kr.enc | 4 ++-- library/encoding/gb1988.enc | 4 ++-- library/encoding/jis0201.enc | 4 ++-- library/encoding/macJapan.enc | 2 +- library/encoding/shiftjis.enc | 2 +- tools/encoding/big5.txt | 1 + 9 files changed, 15 insertions(+), 14 deletions(-) diff --git a/library/encoding/big5.enc b/library/encoding/big5.enc index 26179f4..d6ff760 100644 --- a/library/encoding/big5.enc +++ b/library/encoding/big5.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-cn.enc b/library/encoding/euc-cn.enc index 4b2f8c7..ff0f984 100644 --- a/library/encoding/euc-cn.enc +++ b/library/encoding/euc-cn.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-jp.enc b/library/encoding/euc-jp.enc index db56c88..d4337d9 100644 --- a/library/encoding/euc-jp.enc +++ b/library/encoding/euc-jp.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D0000008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/euc-kr.enc b/library/encoding/euc-kr.enc index 5e9bb93..0433260 100644 --- a/library/encoding/euc-kr.enc +++ b/library/encoding/euc-kr.enc @@ -10,8 +10,8 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 diff --git a/library/encoding/gb1988.enc b/library/encoding/gb1988.enc index 298732c..8254684 100644 --- a/library/encoding/gb1988.enc +++ b/library/encoding/gb1988.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D203E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F diff --git a/library/encoding/jis0201.enc b/library/encoding/jis0201.enc index 64f423f..70e099d 100644 --- a/library/encoding/jis0201.enc +++ b/library/encoding/jis0201.enc @@ -10,8 +10,8 @@ S 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D203E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F FF80FF81FF82FF83FF84FF85FF86FF87FF88FF89FF8AFF8BFF8CFF8DFF8EFF8F diff --git a/library/encoding/macJapan.enc b/library/encoding/macJapan.enc index dba24bd..9f3f03b 100644 --- a/library/encoding/macJapan.enc +++ b/library/encoding/macJapan.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 00A0FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/library/encoding/shiftjis.enc b/library/encoding/shiftjis.enc index 140aec4..3ba972e 100644 --- a/library/encoding/shiftjis.enc +++ b/library/encoding/shiftjis.enc @@ -10,7 +10,7 @@ M 0050005100520053005400550056005700580059005A005B005C005D005E005F 0060006100620063006400650066006700680069006A006B006C006D006E006F 0070007100720073007400750076007700780079007A007B007C007D007E007F -0080000000000000000000850086008700000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000000000000000000000000000000000000000000000000000000000000000 0000FF61FF62FF63FF64FF65FF66FF67FF68FF69FF6AFF6BFF6CFF6DFF6EFF6F FF70FF71FF72FF73FF74FF75FF76FF77FF78FF79FF7AFF7BFF7CFF7DFF7EFF7F diff --git a/tools/encoding/big5.txt b/tools/encoding/big5.txt index 58cdfe2..06b0fac 100644 --- a/tools/encoding/big5.txt +++ b/tools/encoding/big5.txt @@ -185,6 +185,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE 0xA140 0x3000 # IDEOGRAPHIC SPACE 0xA141 0xFF0C # FULLWIDTH COMMA 0xA142 0x3001 # IDEOGRAPHIC COMMA -- cgit v0.12 From 4d644dfb73457eb3615b30550dd31d1b48bfa7d4 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 21 Feb 2023 16:03:18 +0000 Subject: Generate test data from ICU UCM data files. SBCS only for now --- tools/ucm2tests.tcl | 185 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 185 insertions(+) create mode 100644 tools/ucm2tests.tcl diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl new file mode 100644 index 0000000..22ae529 --- /dev/null +++ b/tools/ucm2tests.tcl @@ -0,0 +1,185 @@ +# ucm2tests.tcl +# +# Parses given ucm files (from ICU) to generate test data +# for encodings. The generated scripts are written to stdout. +# +# tclsh ucmtotests.tcl PATH_TO_ICU_UCM_DIRECTORY +# + +namespace eval ucm { + # No means to change these currently but ... + variable outputChan stdout + variable errorChan stderr + variable verbose 0 + + # Map Tcl encoding name to ICU UCM file name + variable encNameMap + array set encNameMap { + cp1250 glibc-CP1250-2.1.2 + cp1251 glibc-CP1251-2.1.2 + cp1252 glibc-CP1252-2.1.2 + cp1253 glibc-CP1253-2.1.2 + cp1254 glibc-CP1254-2.1.2 + cp1255 glibc-CP1255-2.1.2 + cp1256 glibc-CP1256-2.1.2 + cp1257 glibc-CP1257-2.1.2 + cp1258 glibc-CP1258-2.1.2 + iso8859-1 glibc-ISO_8859_1-2.1.2 + iso8859-2 glibc-ISO_8859_2-2.1.2 + iso8859-3 glibc-ISO_8859_3-2.1.2 + iso8859-4 glibc-ISO_8859_4-2.1.2 + iso8859-5 glibc-ISO_8859_5-2.1.2 + iso8859-6 glibc-ISO_8859_6-2.1.2 + iso8859-7 glibc-ISO_8859_7-2.3.3 + iso8859-8 glibc-ISO_8859_8-2.3.3 + iso8859-9 glibc-ISO_8859_9-2.1.2 + iso8859-10 glibc-ISO_8859_10-2.1.2 + iso8859-11 glibc-ISO_8859_11-2.1.2 + iso8859-13 glibc-ISO_8859_13-2.1.2 + iso8859-14 glibc-ISO_8859_14-2.1.2 + iso8859-15 glibc-ISO_8859_15-2.1.2 + iso8859-16 glibc-ISO_8859_16-2.3.3 + } + + # Dictionary Character map for Tcl encoding + variable charMap +} + +proc ucm::abort {msg} { + variable errorChan + puts $errorChan $msg + exit 1 +} +proc ucm::warn {msg} { + variable errorChan + puts $errorChan $msg +} +proc ucm::log {msg} { + variable verbose + if {$verbose} { + variable errorChan + puts $errorChan $msg + } +} +proc ucm::print {s} { + variable outputChan + puts $outputChan $s +} + +proc ucm::parse_SBCS {fd} { + set result {} + while {[gets $fd line] >= 0} { + if {[string match #* $line]} { + continue + } + if {[string equal "END CHARMAP" [string trim $line]]} { + break + } + if {![regexp {^\s*\s*((\\x[[:xdigit:]]{2})+)\s*(\|(0|1|2|3|4))} $line -> unichar bytes - - precision]} { + error "Unexpected line parsing SBCS: $line" + } + set bytes [string map {\\x {}} $bytes]; # \xNN -> NN + if {$precision eq "" || $precision eq "0"} { + lappend result $unichar $bytes + } else { + # It is a fallback mapping - ignore + } + } + return $result +} + +proc ucm::generate_tests {} { + variable encNameMap + variable charMap + + array set tclNames {} + foreach encName [encoding names] { + set tclNames($encName) "" + } + foreach encName [lsort [array names encNameMap]] { + if {![info exists charMap($encName)]} { + warn "No character map read for $encName" + continue + } + unset tclNames($encName) + print "\n# $encName (generated from $encNameMap($encName))" + print "lappend encValidStrings {*}{" + foreach {unich hex} $charMap($encName) { + print " $encName \\u$unich $hex {} {}" + } + print "}; # $encName" + } + if {[array size tclNames]} { + warn "Missing encoding: [lsort [array names tclNames]]" + } +} + +proc ucm::parse_file {encName ucmPath} { + variable charMap + set fd [open $ucmPath] + try { + # Parse the metadata + unset -nocomplain state + while {[gets $fd line] >= 0} { + if {[regexp {<(code_set_name|mb_cur_max|mb_cur_min|uconv_class|subchar)>\s+(\S+)} $line -> key val]} { + set state($key) $val + } elseif {[regexp {^\s*CHARMAP\s*$} $line]} { + set state(charmap) "" + break + } else { + # Skip all else + } + } + if {![info exists state(charmap)]} { + abort "Error: $path has No CHARMAP line." + } + foreach key {code_set_name uconv_class} { + if {[info exists state($key)]} { + set state($key) [string trim $state($key) {"}] + } + } + if {[info exists charMap($encName)]} { + abort "Duplicate file for $encName ($path)" + } + if {![info exists state(uconv_class)]} { + abort "Error: $path has no uconv_class definition." + } + switch -exact -- $state(uconv_class) { + SBCS { + if {[catch { + set charMap($encName) [parse_SBCS $fd] + } result]} { + abort "Could not process $path. $result" + } + } + default { + log "Skipping $path -- not SBCS encoding." + return + } + } + } finally { + close $fd + } +} + +proc ucm::expand_paths {patterns} { + set expanded {} + foreach pat $patterns { + # The file join is for \ -> / + lappend expanded {*}[glob -nocomplain [file join $pat]] + } + return $expanded +} + +proc ucm::run {} { + variable encNameMap + if {[llength $::argv] != 1} { + abort "Usage: [info nameofexecutable] $::argv0 PATHTOUCMFILES" + } + foreach {encName fname} [array get encNameMap] { + ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm] + } + generate_tests +} + +ucm::run -- cgit v0.12 From 417f86869302e367a498d61a8bed0aa755746517 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 16:09:18 +0000 Subject: Proposed fix for [534172ff5b]: Crash in DeleteReflectedChannelMap (introduced via tcllib 1.21) --- generic/tclIORChan.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/generic/tclIORChan.c b/generic/tclIORChan.c index 7ea50c8..8c6f25f 100644 --- a/generic/tclIORChan.c +++ b/generic/tclIORChan.c @@ -1128,8 +1128,8 @@ ReflectClose( /* * This call comes from TclFinalizeIOSystem. There are no * interpreters, and therefore we cannot call upon the handler command - * anymore. Threading is irrelevant as well. We simply clean up all - * our C level data structures and leave the Tcl level to the other + * anymore. Threading is irrelevant as well. Simply clean up all + * the C level data structures and leave the Tcl level to the other * finalization functions. */ @@ -2697,6 +2697,7 @@ DeleteThreadReflectedChannelMap( Tcl_ThreadId self = Tcl_GetCurrentThread(); ReflectedChannelMap *rcmPtr; /* The map */ ForwardingResult *resultPtr; + ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); (void)dummy; /* @@ -2777,6 +2778,7 @@ DeleteThreadReflectedChannelMap( */ rcmPtr = GetThreadReflectedChannelMap(); + tsdPtr->rcmPtr = NULL; for (hPtr = Tcl_FirstHashEntry(&rcmPtr->map, &hSearch); hPtr != NULL; hPtr = Tcl_FirstHashEntry(&rcmPtr->map, &hSearch)) { @@ -3083,10 +3085,10 @@ ForwardProc( (paramPtr->seek.seekMode==SEEK_SET) ? "start" : (paramPtr->seek.seekMode==SEEK_CUR) ? "current" : "end", -1); - Tcl_IncrRefCount(offObj); - Tcl_IncrRefCount(baseObj); + Tcl_IncrRefCount(offObj); + Tcl_IncrRefCount(baseObj); - Tcl_Preserve(rcPtr); + Tcl_Preserve(rcPtr); if (InvokeTclMethod(rcPtr, METH_SEEK, offObj, baseObj, &resObj)!=TCL_OK){ ForwardSetObjError(paramPtr, resObj); paramPtr->seek.offset = -1; -- cgit v0.12 From 42956f6f09023c19e2c057150f6584f0f1f40b4c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 16:58:13 +0000 Subject: Some test-cases start failing in 9.0-compatibility-mode (-DTCL_NO_DEPRECATED), if the system encoding is one with gaps. So, better use iso8859-1 for those testcases --- tests/chanio.test | 12 ++++++------ tests/io.test | 12 ++++++------ 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index fb94051..61c168f 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -6736,8 +6736,8 @@ test chan-io-52.5 {TclCopyChannel, all} -constraints {fcopy} -setup { } -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -1 ;# -1 means 'copy all', same as if no -size specified. set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6752,8 +6752,8 @@ test chan-io-52.5a {TclCopyChannel, all, other negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -2 ;# < 0 behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6768,8 +6768,8 @@ test chan-io-52.5b {TclCopyChannel, all, wrap to negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size 3221176172 ;# Wrapped to < 0, behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 diff --git a/tests/io.test b/tests/io.test index 7b8182e..aed7f85 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7280,8 +7280,8 @@ test io-52.5 {TclCopyChannel, all} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size -1 ;# -1 means 'copy all', same as if no -size specified. set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 @@ -7297,8 +7297,8 @@ test io-52.5a {TclCopyChannel, all, other negative value} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size -2 ;# < 0 behaves like -1, copy all set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 @@ -7314,8 +7314,8 @@ test io-52.5b {TclCopyChannel, all, wrap to negative value} {fcopy} { file delete $path(test1) set f1 [open $thisScript] set f2 [open $path(test1) w] - fconfigure $f1 -translation lf -blocking 0 - fconfigure $f2 -translation lf -blocking 0 + fconfigure $f1 -translation lf -encoding iso8859-1 -blocking 0 + fconfigure $f2 -translation lf -encoding iso8859-1 -blocking 0 fcopy $f1 $f2 -size 3221176172 ;# Wrapped to < 0, behaves like -1, copy all set result [list [fconfigure $f1 -blocking] [fconfigure $f2 -blocking]] close $f1 -- cgit v0.12 From 9b8fa27457c97577817b8f86b0b658a04867d7c7 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 21 Feb 2023 17:27:16 +0000 Subject: Rework ICU tests to check validity of whole charmap in one test, else too many tests. --- tests/cmdAH.test | 87 +++++++++++++++++++++++++++----------------- tools/ucm2tests.tcl | 101 ++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 144 insertions(+), 44 deletions(-) diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 1fbe6d2..3be2f14 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -186,9 +186,11 @@ set encDefaultProfile tcl8; # Should reflect the default from implementation # TODO - valid sequences for different encodings - shiftjis etc. # Note utf-16, utf-32 missing because they are automatically # generated based on le/be versions. -set encValidStrings { +lappend encValidStrings {*}{ ascii \u0000 00 {} {Lowest ASCII} ascii \u007F 7F knownBug {Highest ASCII} + ascii \u007D 7D {} {Brace - just to verify test scripts are escaped correctly} + ascii \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly} utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1} utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1} @@ -361,9 +363,28 @@ lappend encInvalidBytes {*}{ utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} utf-8 C080 strict {} 0 {} {C080 -> invalid} utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} + utf-8 C0A2 tcl8 \u00C0\u00A2 -1 {} {websec.github.io - A} + utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A} + utf-8 C0A2 strict {} 0 {} {websec.github.io - A} + utf-8 C0A7 tcl8 \u00C0\u00A7 -1 {} {websec.github.io - double quote} + utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote} + utf-8 C0A7 strict {} 0 {} {websec.github.io - double quote} + utf-8 C0AE tcl8 \u00C0\u00AE -1 {} {websec.github.io - full stop} + utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop} + utf-8 C0AE strict {} 0 {} {websec.github.io - full stop} + utf-8 C0AF tcl8 \u00C0\u00AF -1 {} {websec.github.io - solidus} + utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus} + utf-8 C0AF strict {} 0 {} {websec.github.io - solidus} + utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} + utf-8 C181 tcl8 \u00C1\u0081 -1 {} {websec.github.io - base test (A)} + utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)} + utf-8 C181 strict {} 0 {} {websec.github.io - base test (A)} + utf-8 C19C tcl8 \u00C1\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 C19C strict {} 0 {} {websec.github.io - reverse solidus} utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte} utf-8 C2 replace \uFFFD -1 {} {Missing trail byte} @@ -387,6 +408,9 @@ lappend encInvalidBytes {*}{ utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF} utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0819C tcl8 \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C strict {} 0 {} {websec.github.io - reverse solidus} utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF} utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} @@ -526,6 +550,9 @@ lappend encInvalidBytes {*}{ utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} utf-8 F0 strict {} 0 {} {Missing trail byte} + utf-8 F080 tcl8 \u00F0\u20AC -1 {} {First trail byte must be 90:BF} + utf-8 F080 replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F080 strict {} 0 {} {First trail byte must be 90:BF} utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF} utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} @@ -755,7 +782,7 @@ lappend encInvalidBytes {*}{ # be skipped. This is intentional to skip known bugs. # TODO - other encodings # TODO - out of range code point (note cannot be generated by \U notation) -set encUnencodableStrings { +lappend encUnencodableStrings {*}{ ascii \u00e0 tcl8 3f -1 {} {unencodable} ascii \u00e0 strict {} 0 {} {unencodable} @@ -768,12 +795,6 @@ set encUnencodableStrings { utf-8 \uDC00 strict {} 0 {} High-surrogate } -if {$::tcl_platform(byteOrder) eq "littleEndian"} { - set endian le -} else { - set endian be -} - # Maps utf-{16,32}{le,be} to utf-16, utf-32 and # others to "". Used to test utf-16, utf-32 based # on system endianness @@ -881,19 +902,19 @@ proc testprofile {id converter enc profile data result args} { # Generates tests for compiled and uncompiled implementation. # Also generates utf-{16,32} tests if passed encoding is utf-{16,32}{le,be} # The enc and profile are appended to id to generate the test id -proc testfailindex {id converter enc data result {profile default}} { - testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc $data] \[set idx]" $result +proc testfailindex {id converter enc data result failidx {profile default}} { + testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc [list $data]\] \[set idx\]" [list $result $failidx] if {[set enc2 [endianUtf $enc]] ne ""} { # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc2.$profile "list \[encoding $converter -profile $profile -failindex idx $enc2 $data] \[set idx]" $result + testconvert $id.$enc2.$profile "list \[encoding $converter -profile $profile -failindex idx $enc2 [list $data]\] \[set idx]" [list $result $failidx] } # If this is the default profile, generate a test without specifying profile if {$profile eq $::encDefaultProfile} { - testconvert $id.$enc.default "list \[encoding $converter -failindex idx $enc $data] \[set idx]" $result + testconvert $id.$enc.default "list \[encoding $converter -failindex idx $enc [list $data]\] \[set idx]" [list $result $failidx] if {[set enc2 [endianUtf $enc]] ne ""} { # If utf{16,32}-{le,be}, also do utf{16,32} - testconvert $id.$enc2.default "list \[encoding $converter -failindex idx $enc2 $data] \[set idx]" $result + testconvert $id.$enc2.default "list \[encoding $converter -failindex idx $enc2 [list $data]\] \[set idx]" [list $result $failidx] } } } @@ -962,10 +983,10 @@ foreach {enc str hex ctrl comment} $encValidStrings { set prefix_bytes [encoding convertto $enc A] set suffix_bytes [encoding convertto $enc B] foreach profile $encProfiles { - testfailindex cmdAH-4.3.13.$hex.solo convertfrom $enc $bytes [list $str -1] $profile - testfailindex cmdAH-4.3.13.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile - testfailindex cmdAH-4.3.13.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile - testfailindex cmdAH-4.3.13.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile + testprofile cmdAH-4.3.13.$hex.solo convertfrom $enc $profile $bytes $str + testprofile cmdAH-4.3.13.$hex.lead convertfrom $enc $profile $bytes$suffix_bytes $str$suffix + testprofile cmdAH-4.3.13.$hex.tail convertfrom $enc $profile $prefix_bytes$bytes $prefix$str + testprofile cmdAH-4.3.13.$hex.middle convertfrom $enc $profile $prefix_bytes$bytes$suffix_bytes $prefix$str$suffix } } @@ -1026,10 +1047,10 @@ foreach {enc str hex ctrl comment} $encValidStrings { set prefix_bytes [encoding convertto $enc $prefix] set suffix_bytes [encoding convertto $enc $suffix] foreach profile $encProfiles { - testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str -1] $profile - testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $str$suffix -1] $profile - testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $prefix$str -1] $profile - testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $prefix$str$suffix -1] $profile + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes $str -1 $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes $str$suffix -1 $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes $prefix$str -1 $profile + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes $prefix$str$suffix -1 $profile } } @@ -1044,7 +1065,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set suffix_bytes [encoding convertto $enc $suffix] set prefixLen [string length $prefix_bytes] if {$ctrl eq {} || "solo" in $ctrl} { - testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes [list $str $failidx] $profile + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes $str $failidx $profile } if {$ctrl eq {} || "lead" in $ctrl} { if {$failidx == -1} { @@ -1054,7 +1075,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { # Failure expected set result "" } - testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes [list $result $failidx] $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes $result $failidx $profile } if {$ctrl eq {} || "tail" in $ctrl} { set expected_failidx $failidx @@ -1066,7 +1087,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes [list $result $expected_failidx] $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes $result $expected_failidx $profile } if {$ctrl eq {} || "middle" in $ctrl} { set expected_failidx $failidx @@ -1078,7 +1099,7 @@ foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes [list $result $expected_failidx] $profile + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes $result $expected_failidx $profile } } @@ -1193,10 +1214,10 @@ foreach {enc str hex ctrl comment} $encValidStrings { set prefix_bytes [encoding convertto $enc A] set suffix_bytes [encoding convertto $enc B] foreach profile $encProfiles { - testfailindex cmdAH-4.4.14.$enc.$printable.solo convertto $enc $str [list $bytes -1] $profile - testfailindex cmdAH-4.4.14.$enc.$printable.lead convertto $enc $str$suffix [list $bytes$suffix_bytes -1] $profile - testfailindex cmdAH-4.4.14.$enc.$printable.tail convertto $enc $prefix$str [list $prefix_bytes$bytes -1] $profile - testfailindex cmdAH-4.4.14.$enc.$printable.middle convertto $enc $prefix$str$suffix [list $prefix_bytes$bytes$suffix_bytes -1] $profile + testfailindex cmdAH-4.4.14.$enc.$printable.solo convertto $enc $str $bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.lead convertto $enc $str$suffix $bytes$suffix_bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.tail convertto $enc $prefix$str $prefix_bytes$bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.middle convertto $enc $prefix$str$suffix $prefix_bytes$bytes$suffix_bytes -1 $profile } } @@ -1209,7 +1230,7 @@ foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { set suffix B set prefixLen [string length [encoding convertto $enc $prefix]] if {$ctrl eq {} || "solo" in $ctrl} { - testfailindex cmdAH-4.4.14.$printable.solo convertto $enc $str [list $bytes $failidx] $profile + testfailindex cmdAH-4.4.14.$printable.solo convertto $enc $str $bytes $failidx $profile } if {$ctrl eq {} || "lead" in $ctrl} { if {$failidx == -1} { @@ -1219,7 +1240,7 @@ foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { # Failure expected set result "" } - testfailindex cmdAH-4.4.14.$printable.lead convertto $enc $str$suffix [list $result $failidx] $profile + testfailindex cmdAH-4.4.14.$printable.lead convertto $enc $str$suffix $result $failidx $profile } if {$ctrl eq {} || "tail" in $ctrl} { set expected_failidx $failidx @@ -1231,7 +1252,7 @@ foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.4.14.$printable.tail convertto $enc $prefix$str [list $result $expected_failidx] $profile + testfailindex cmdAH-4.4.14.$printable.tail convertto $enc $prefix$str $result $expected_failidx $profile } if {$ctrl eq {} || "middle" in $ctrl} { set expected_failidx $failidx @@ -1243,7 +1264,7 @@ foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { set result $prefix incr expected_failidx $prefixLen } - testfailindex cmdAH-4.4.14.$printable.middle convertto $enc $prefix$str$suffix [list $result $expected_failidx] $profile + testfailindex cmdAH-4.4.14.$printable.middle convertto $enc $prefix$str$suffix $result $expected_failidx $profile } } diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl index 22ae529..e971631 100644 --- a/tools/ucm2tests.tcl +++ b/tools/ucm2tests.tcl @@ -1,14 +1,15 @@ # ucm2tests.tcl # # Parses given ucm files (from ICU) to generate test data -# for encodings. The generated scripts are written to stdout. +# for encodings. # -# tclsh ucmtotests.tcl PATH_TO_ICU_UCM_DIRECTORY +# tclsh ucm2tests.tcl PATH_TO_ICU_UCM_DIRECTORY ?OUTPUTPATH? # namespace eval ucm { # No means to change these currently but ... - variable outputChan stdout + variable outputPath + variable outputChan variable errorChan stderr variable verbose 0 @@ -24,6 +25,7 @@ namespace eval ucm { cp1256 glibc-CP1256-2.1.2 cp1257 glibc-CP1257-2.1.2 cp1258 glibc-CP1258-2.1.2 + gb1988 glibc-GB_1988_80-2.3.3 iso8859-1 glibc-ISO_8859_1-2.1.2 iso8859-2 glibc-ISO_8859_2-2.1.2 iso8859-3 glibc-ISO_8859_3-2.1.2 @@ -91,27 +93,99 @@ proc ucm::parse_SBCS {fd} { proc ucm::generate_tests {} { variable encNameMap variable charMap + variable outputPath + variable outputChan + + if {[info exists outputPath]} { + set outputChan [open $outputPath w] + } else { + set outputChan stdout + } array set tclNames {} foreach encName [encoding names] { set tclNames($encName) "" } - foreach encName [lsort [array names encNameMap]] { + + # Common procedures + print { +# This file is automatically generated by ucm2tests.tcl. +# Edits will be overwritten on next generation. +# +# Generates tests comparing Tcl encodings to ICU. +# The generated file is NOT standalone. It should be sourced into a test script. + +proc ucmConvertfromMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unich [subst "\\U$unihex"] + if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +proc ucmConverttoMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unich [subst "\\U$unihex"] + if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + } + foreach encName [lsort -dictionary [array names encNameMap]] { if {![info exists charMap($encName)]} { warn "No character map read for $encName" continue } unset tclNames($encName) - print "\n# $encName (generated from $encNameMap($encName))" - print "lappend encValidStrings {*}{" - foreach {unich hex} $charMap($encName) { - print " $encName \\u$unich $hex {} {}" + + print "\n#\n# $encName (generated from $encNameMap($encName))" + print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConvertfromMismatches $encName {$charMap($encName)}" + print "\} -result {}" + print "\ntest encoding-convertto-ucmCompare-$encName {Compare against ICU UCM} -body \{" + print " ucmConverttoMismatches $encName {$charMap($encName)}" + print "\} -result {}" + if {0} { + # This will generate individual tests for every char + # and test in lead, tail, middle, solo configurations + # but takes considerable time + print "lappend encValidStrings {*}{" + foreach {unich hex} $charMap($encName) { + print " $encName \\u$unich $hex {} {}" + } + print "}; # $encName" } - print "}; # $encName" } if {[array size tclNames]} { warn "Missing encoding: [lsort [array names tclNames]]" } + if {[info exists outputPath]} { + close $outputChan + unset outputChan + } } proc ucm::parse_file {encName ucmPath} { @@ -173,8 +247,13 @@ proc ucm::expand_paths {patterns} { proc ucm::run {} { variable encNameMap - if {[llength $::argv] != 1} { - abort "Usage: [info nameofexecutable] $::argv0 PATHTOUCMFILES" + variable outputPath + switch [llength $::argv] { + 2 {set outputPath [lindex $::argv 1]} + 1 {} + default { + abort "Usage: [info nameofexecutable] $::argv0 path/to/icu/ucm/data ?outputfile?" + } } foreach {encName fname} [array get encNameMap] { ucm::parse_file $encName [file join [lindex $::argv 0] ${fname}.ucm] -- cgit v0.12 From c606ae1574a7d66bcbf8666506e91840875f6d45 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 17:50:36 +0000 Subject: Proposed fix for [d19fe0a5b]: Handling incomplete byte sequences for utf-16/utf-32 --- generic/tclEncoding.c | 27 ++++++++++++++++++++++++--- tests/encoding.test | 6 ++++++ 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index dfa7907..ecec6e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2457,21 +2457,27 @@ UnicodeToUtfProc( } result = TCL_OK; - /* check alignment with utf-16 (2 == sizeof(UTF-16)) */ + /* + * Check alignment with utf-16 (2 == sizeof(UTF-16)) + */ + if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; srcLen--; } +#if TCL_UTF_MAX > 3 /* - * If last code point is a high surrogate, we cannot handle that yet. + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. */ - if ((srcLen >= 2) && + if (!(flags & TCL_ENCODING_END) && (srcLen >= 2) && ((src[srcLen - (clientData?1:2)] & 0xFC) == 0xD8)) { result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } +#endif srcStart = src; srcEnd = src + srcLen; @@ -2504,6 +2510,21 @@ UnicodeToUtfProc( src += sizeof(unsigned short); } + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + /* destination is not full, so we really are at the end now */ + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_SYNTAX; + } else { + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; + } + } + } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; diff --git a/tests/encoding.test b/tests/encoding.test index f558e01..f6f9abc 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -419,6 +419,12 @@ test encoding-16.3 {UnicodeToUtfProc} -body { set val [encoding convertfrom unicode "\xDC\xDC"] list $val [format %X [scan $val %c]] } -result "\uDCDC DCDC" +test encoding-16.4 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom unicode "\x41\x41\x41" +} -result \u4141\uFFFD +test encoding-16.5 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints ucs2 -body { + encoding convertfrom unicode "\xD8\xD8" +} -result \uD8D8 test encoding-17.1 {UtfToUnicodeProc} -constraints fullutf -body { encoding convertto unicode "\U460DC" -- cgit v0.12 From f95599f4d4b6e502a92971909286a8ec6533c8c2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 23:16:14 +0000 Subject: More encoding fixes, e.g. range 0x80-0x9F for dingbats and symbol. Remove "-m" option from txt2enc.c tool, since the same is already handled in the table encoding code in Tcl itself. This was wat prevent Tcl to handle throwing exceptions correctly --- library/encoding/dingbats.enc | 4 ++-- library/encoding/ebcdic.enc | 1 + library/encoding/symbol.enc | 4 ++-- tools/encoding/Makefile | 2 +- tools/encoding/dingbats.txt | 1 + tools/encoding/gb1988.txt | 1 + tools/encoding/macTurkish.txt | 1 + tools/encoding/macUkraine.txt | 1 + tools/encoding/symbol.txt | 1 + tools/encoding/txt2enc.c | 14 +------------- 10 files changed, 12 insertions(+), 18 deletions(-) diff --git a/library/encoding/dingbats.enc b/library/encoding/dingbats.enc index 9729487..bd466b2 100644 --- a/library/encoding/dingbats.enc +++ b/library/encoding/dingbats.enc @@ -10,8 +10,8 @@ S 2730273127322733273427352736273727382739273A273B273C273D273E273F 2740274127422743274427452746274727482749274A274B25CF274D25A0274F 27502751275225B225BC25C6275625D727582759275A275B275C275D275E007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 0000276127622763276427652766276726632666266526602460246124622463 2464246524662467246824692776277727782779277A277B277C277D277E277F 2780278127822783278427852786278727882789278A278B278C278D278E278F diff --git a/library/encoding/ebcdic.enc b/library/encoding/ebcdic.enc index f451de5..f83ce7d 100644 --- a/library/encoding/ebcdic.enc +++ b/library/encoding/ebcdic.enc @@ -1,3 +1,4 @@ +# Encoding file: ebcdic, single-byte S 006F 0 1 00 diff --git a/library/encoding/symbol.enc b/library/encoding/symbol.enc index ffda9e3..ebd2f49 100644 --- a/library/encoding/symbol.enc +++ b/library/encoding/symbol.enc @@ -10,8 +10,8 @@ S 03A0039803A103A303A403A503C203A9039E03A80396005B2234005D22A5005F F8E503B103B203C703B403B503C603B303B703B903D503BA03BB03BC03BD03BF 03C003B803C103C303C403C503D603C903BE03C803B6007B007C007D223C007F -0080008100820083008400850086008700880089008A008B008C008D008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +0000000000000000000000000000000000000000000000000000000000000000 +0000000000000000000000000000000000000000000000000000000000000000 000003D2203222642044221E0192266326662665266021942190219121922193 00B000B12033226500D7221D2202202200F72260226122482026F8E6F8E721B5 21352111211C21182297229522052229222A2283228722842282228622082209 diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index 7235b47..a2122d5 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc -m $$p > $$enc; \ + ./txt2enc $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 334f8d6..93a6081 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,6 +155,7 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index 800cd68..b9197e5 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,6 +93,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE +0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index 4a1ddab..ca3cda3 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,6 +203,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dba4e10..dc07cdc 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,6 +148,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 12dcae6..13a3ed8 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,6 +169,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR +0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 7ee797b..80b44b9 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,7 +43,6 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; - fixmissing = 1; opterr = 0; while (1) { @@ -89,10 +88,6 @@ main(int argc, char **argv) symbol = 1; break; - case 'm': - fixmissing = 0; - break; - default: goto usage; } @@ -207,13 +202,6 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } - if (fixmissing) { - for (i = 0x7F; i < 0xA0; i++) { - if (toUnicode[i] == NULL && toUnicode[0][i] == 0) { - toUnicode[0][i] = i; - } - } - } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); -- cgit v0.12 From a970bffd00117d4e762dfec90e21a94576da94fc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Feb 2023 23:22:45 +0000 Subject: Add 0x7F: DELETE to more mac* encodings (so re-generating doesn't remove it again) --- tools/encoding/macCentEuro.txt | 1 + tools/encoding/macCroatian.txt | 1 + tools/encoding/macCyrillic.txt | 1 + tools/encoding/macDingbats.txt | 1 + tools/encoding/macGreek.txt | 1 + tools/encoding/macIceland.txt | 1 + tools/encoding/macJapan.txt | 1 + tools/encoding/macRoman.txt | 1 + tools/encoding/macRomania.txt | 1 + tools/encoding/macThai.txt | 1 + 10 files changed, 10 insertions(+) diff --git a/tools/encoding/macCentEuro.txt b/tools/encoding/macCentEuro.txt index bf424c1..aa92908 100644 --- a/tools/encoding/macCentEuro.txt +++ b/tools/encoding/macCentEuro.txt @@ -188,6 +188,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON diff --git a/tools/encoding/macCroatian.txt b/tools/encoding/macCroatian.txt index 538eda3..2eef246 100644 --- a/tools/encoding/macCroatian.txt +++ b/tools/encoding/macCroatian.txt @@ -216,6 +216,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macCyrillic.txt b/tools/encoding/macCyrillic.txt index 695dade..2e9f8e2 100644 --- a/tools/encoding/macCyrillic.txt +++ b/tools/encoding/macCyrillic.txt @@ -213,6 +213,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/macDingbats.txt b/tools/encoding/macDingbats.txt index 273d526..4b815f4 100644 --- a/tools/encoding/macDingbats.txt +++ b/tools/encoding/macDingbats.txt @@ -151,6 +151,7 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT +0x7F 0x007F # DELETE 0x80 0xF8D7 # medium left parenthesis ornament 0x81 0xF8D8 # medium right parenthesis ornament 0x82 0xF8D9 # medium flattened left parenthesis ornament diff --git a/tools/encoding/macGreek.txt b/tools/encoding/macGreek.txt index 9783259..b960d68 100644 --- a/tools/encoding/macGreek.txt +++ b/tools/encoding/macGreek.txt @@ -207,6 +207,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00B9 # SUPERSCRIPT ONE diff --git a/tools/encoding/macIceland.txt b/tools/encoding/macIceland.txt index 0a0b27b..c60b8d2 100644 --- a/tools/encoding/macIceland.txt +++ b/tools/encoding/macIceland.txt @@ -234,6 +234,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macJapan.txt b/tools/encoding/macJapan.txt index 7121b3b..3c48c4a 100644 --- a/tools/encoding/macJapan.txt +++ b/tools/encoding/macJapan.txt @@ -318,6 +318,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE # Apple change +0x7F 0x007F # DELETE # 0x8140 0x3000 # IDEOGRAPHIC SPACE 0x8141 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/macRoman.txt b/tools/encoding/macRoman.txt index 7ddcf8d..43ad44b 100644 --- a/tools/encoding/macRoman.txt +++ b/tools/encoding/macRoman.txt @@ -233,6 +233,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macRomania.txt b/tools/encoding/macRomania.txt index 2a84adc..36a0b68 100644 --- a/tools/encoding/macRomania.txt +++ b/tools/encoding/macRomania.txt @@ -154,6 +154,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macThai.txt b/tools/encoding/macThai.txt index b991833..2043621 100644 --- a/tools/encoding/macThai.txt +++ b/tools/encoding/macThai.txt @@ -168,6 +168,7 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE +0x7F 0x007F # DELETE # 0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK -- cgit v0.12 From 78db448fff66d55223a88f8225976f4324de1b95 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 13:31:41 +0000 Subject: Make txt2enc smarter, so we don't have to add DELETE any more in all original tables, better keep them as-is. --- tools/encoding/Makefile | 2 +- tools/encoding/ascii.txt | 1 - tools/encoding/big5.txt | 1 - tools/encoding/dingbats.txt | 1 - tools/encoding/gb1988.txt | 1 - tools/encoding/macCentEuro.txt | 1 - tools/encoding/macCroatian.txt | 1 - tools/encoding/macCyrillic.txt | 1 - tools/encoding/macDingbats.txt | 1 - tools/encoding/macGreek.txt | 1 - tools/encoding/macIceland.txt | 1 - tools/encoding/macJapan.txt | 1 - tools/encoding/macRoman.txt | 1 - tools/encoding/macRomania.txt | 1 - tools/encoding/macThai.txt | 1 - tools/encoding/macTurkish.txt | 1 - tools/encoding/macUkraine.txt | 1 - tools/encoding/symbol.txt | 1 - tools/encoding/txt2enc.c | 14 ++++++++++++-- 19 files changed, 13 insertions(+), 20 deletions(-) diff --git a/tools/encoding/Makefile b/tools/encoding/Makefile index a2122d5..ff19492 100644 --- a/tools/encoding/Makefile +++ b/tools/encoding/Makefile @@ -67,7 +67,7 @@ encodings: clean txt2enc $(EUC_ENCODINGS) @for p in *.txt; do \ enc=`echo $$p | sed 's/\..*$$/\.enc/'`; \ echo $$enc; \ - ./txt2enc $$p > $$enc; \ + ./txt2enc -e 0 -u 1 $$p > $$enc; \ done @echo @echo Compiling special versions of encoding files. diff --git a/tools/encoding/ascii.txt b/tools/encoding/ascii.txt index 2afbaab..66ba6f3 100644 --- a/tools/encoding/ascii.txt +++ b/tools/encoding/ascii.txt @@ -93,4 +93,3 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE \ No newline at end of file diff --git a/tools/encoding/big5.txt b/tools/encoding/big5.txt index 06b0fac..58cdfe2 100644 --- a/tools/encoding/big5.txt +++ b/tools/encoding/big5.txt @@ -185,7 +185,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE 0xA140 0x3000 # IDEOGRAPHIC SPACE 0xA141 0xFF0C # FULLWIDTH COMMA 0xA142 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/dingbats.txt b/tools/encoding/dingbats.txt index 93a6081..334f8d6 100644 --- a/tools/encoding/dingbats.txt +++ b/tools/encoding/dingbats.txt @@ -155,7 +155,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x275E # DELETE 0xA1 0x2761 # CURVED STEM PARAGRAPH SIGN ORNAMENT 0xA2 0x2762 # HEAVY EXCLAMATION MARK ORNAMENT 0xA3 0x2763 # HEAVY HEART EXCLAMATION MARK ORNAMENT diff --git a/tools/encoding/gb1988.txt b/tools/encoding/gb1988.txt index b9197e5..800cd68 100644 --- a/tools/encoding/gb1988.txt +++ b/tools/encoding/gb1988.txt @@ -93,7 +93,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x203E # OVERLINE -0x7F 0x007F # DELETE 0xA1 0xFF61 # HALFWIDTH IDEOGRAPHIC FULL STOP 0xA2 0xFF62 # HALFWIDTH LEFT CORNER BRACKET 0xA3 0xFF63 # HALFWIDTH RIGHT CORNER BRACKET diff --git a/tools/encoding/macCentEuro.txt b/tools/encoding/macCentEuro.txt index aa92908..bf424c1 100644 --- a/tools/encoding/macCentEuro.txt +++ b/tools/encoding/macCentEuro.txt @@ -188,7 +188,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x0100 # LATIN CAPITAL LETTER A WITH MACRON diff --git a/tools/encoding/macCroatian.txt b/tools/encoding/macCroatian.txt index 2eef246..538eda3 100644 --- a/tools/encoding/macCroatian.txt +++ b/tools/encoding/macCroatian.txt @@ -216,7 +216,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macCyrillic.txt b/tools/encoding/macCyrillic.txt index 2e9f8e2..695dade 100644 --- a/tools/encoding/macCyrillic.txt +++ b/tools/encoding/macCyrillic.txt @@ -213,7 +213,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/macDingbats.txt b/tools/encoding/macDingbats.txt index 4b815f4..273d526 100644 --- a/tools/encoding/macDingbats.txt +++ b/tools/encoding/macDingbats.txt @@ -151,7 +151,6 @@ 0x7C 0x275C # HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT 0x7D 0x275D # HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT 0x7E 0x275E # HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT -0x7F 0x007F # DELETE 0x80 0xF8D7 # medium left parenthesis ornament 0x81 0xF8D8 # medium right parenthesis ornament 0x82 0xF8D9 # medium flattened left parenthesis ornament diff --git a/tools/encoding/macGreek.txt b/tools/encoding/macGreek.txt index b960d68..9783259 100644 --- a/tools/encoding/macGreek.txt +++ b/tools/encoding/macGreek.txt @@ -207,7 +207,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00B9 # SUPERSCRIPT ONE diff --git a/tools/encoding/macIceland.txt b/tools/encoding/macIceland.txt index c60b8d2..0a0b27b 100644 --- a/tools/encoding/macIceland.txt +++ b/tools/encoding/macIceland.txt @@ -234,7 +234,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macJapan.txt b/tools/encoding/macJapan.txt index 3c48c4a..7121b3b 100644 --- a/tools/encoding/macJapan.txt +++ b/tools/encoding/macJapan.txt @@ -318,7 +318,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE # Apple change -0x7F 0x007F # DELETE # 0x8140 0x3000 # IDEOGRAPHIC SPACE 0x8141 0x3001 # IDEOGRAPHIC COMMA diff --git a/tools/encoding/macRoman.txt b/tools/encoding/macRoman.txt index 43ad44b..7ddcf8d 100644 --- a/tools/encoding/macRoman.txt +++ b/tools/encoding/macRoman.txt @@ -233,7 +233,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macRomania.txt b/tools/encoding/macRomania.txt index 36a0b68..2a84adc 100644 --- a/tools/encoding/macRomania.txt +++ b/tools/encoding/macRomania.txt @@ -154,7 +154,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macThai.txt b/tools/encoding/macThai.txt index 2043621..b991833 100644 --- a/tools/encoding/macThai.txt +++ b/tools/encoding/macThai.txt @@ -168,7 +168,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK 0x81 0x00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK diff --git a/tools/encoding/macTurkish.txt b/tools/encoding/macTurkish.txt index ca3cda3..4a1ddab 100644 --- a/tools/encoding/macTurkish.txt +++ b/tools/encoding/macTurkish.txt @@ -203,7 +203,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x00C4 # LATIN CAPITAL LETTER A WITH DIAERESIS 0x81 0x00C5 # LATIN CAPITAL LETTER A WITH RING ABOVE diff --git a/tools/encoding/macUkraine.txt b/tools/encoding/macUkraine.txt index dc07cdc..dba4e10 100644 --- a/tools/encoding/macUkraine.txt +++ b/tools/encoding/macUkraine.txt @@ -148,7 +148,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x007E # TILDE -0x7F 0x007F # DELETE # 0x80 0x0410 # CYRILLIC CAPITAL LETTER A 0x81 0x0411 # CYRILLIC CAPITAL LETTER BE diff --git a/tools/encoding/symbol.txt b/tools/encoding/symbol.txt index 13a3ed8..12dcae6 100644 --- a/tools/encoding/symbol.txt +++ b/tools/encoding/symbol.txt @@ -169,7 +169,6 @@ 0x7C 0x007C # VERTICAL LINE 0x7D 0x007D # RIGHT CURLY BRACKET 0x7E 0x223C # TILDE OPERATOR -0x7F 0x007F # DELETE 0xA1 0x03D2 # GREEK UPSILON WITH HOOK SYMBOL 0xA2 0x2032 # PRIME 0xA3 0x2264 # LESS-THAN OR EQUAL TO diff --git a/tools/encoding/txt2enc.c b/tools/encoding/txt2enc.c index 80b44b9..32c7344 100644 --- a/tools/encoding/txt2enc.c +++ b/tools/encoding/txt2enc.c @@ -26,7 +26,7 @@ main(int argc, char **argv) { FILE *fp; Rune *toUnicode[256]; - int i, multiByte, enc, uni, hi, lo, used, maxEnc; + int i, multiByte, enc, uni, hi, lo, fixmissing, used, maxEnc; int ch, encColumn, uniColumn, fallbackKnown, width; char *fallbackString, *str, *rest, *dot; unsigned int magic, type, symbol, fallbackChar; @@ -43,6 +43,7 @@ main(int argc, char **argv) fallbackKnown = 0; type = -1; symbol = 0; + fixmissing = 1; opterr = 0; while (1) { @@ -88,6 +89,10 @@ main(int argc, char **argv) symbol = 1; break; + case 'm': + fixmissing = 0; + break; + default: goto usage; } @@ -101,7 +106,7 @@ main(int argc, char **argv) fputs(" -f\tfallback character (default: QUESTION MARK)\n", stderr); fputs(" -t\toverride implicit type with single, double, or multi\n", stderr); fputs(" -s\tsymbol+ascii encoding\n", stderr); - fputs(" -m\tdon't implicitly include range 0080 to 00FF\n", stderr); + fputs(" -m\tdon't implicitly include 007F\n", stderr); return 1; } @@ -202,6 +207,11 @@ main(int argc, char **argv) for (i = 0; i < 0x20; i++) { toUnicode[0][i] = i; } + if (fixmissing) { + if (toUnicode[0x7F] == NULL && toUnicode[0][0x7F] == 0) { + toUnicode[0][0x7F] = 0x7F; + } + } } printf("# Encoding file: %s, %s-byte\n", argv[argc - 1], typeString[type]); -- cgit v0.12 From 12345dfed8593e385a076594f4edcc545166d9ac Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 13:34:04 +0000 Subject: re-generate macDingbats.enc, so it can now throw exceptions for the range 0x8E-0x9F --- library/encoding/macDingbats.enc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/library/encoding/macDingbats.enc b/library/encoding/macDingbats.enc index 28449cd..9fa47b5 100644 --- a/library/encoding/macDingbats.enc +++ b/library/encoding/macDingbats.enc @@ -10,8 +10,8 @@ S 2730273127322733273427352736273727382739273A273B273C273D273E273F 2740274127422743274427452746274727482749274A274B25CF274D25A0274F 27502751275225B225BC25C6275625D727582759275A275B275C275D275E007F -F8D7F8D8F8D9F8DAF8DBF8DCF8DDF8DEF8DFF8E0F8E1F8E2F8E3F8E4008E008F -0090009100920093009400950096009700980099009A009B009C009D009E009F +F8D7F8D8F8D9F8DAF8DBF8DCF8DDF8DEF8DFF8E0F8E1F8E2F8E3F8E400000000 +0000000000000000000000000000000000000000000000000000000000000000 0000276127622763276427652766276726632666266526602460246124622463 2464246524662467246824692776277727782779277A277B277C277D277E277F 2780278127822783278427852786278727882789278A278B278C278D278E278F -- cgit v0.12 From 293504812606130380d7240fddbbdc573b9dae8c Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 22 Feb 2023 13:42:55 +0000 Subject: Add ICU tests for unmapped characters. --- tests/cmdAH.test | 4 + tests/icuUcmTests.tcl | 1891 +++++++++++++++++++++++++++++++++++++++++++++++++ tools/ucm2tests.tcl | 156 +++- 3 files changed, 2017 insertions(+), 34 deletions(-) create mode 100644 tests/icuUcmTests.tcl diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 3be2f14..cfde678 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -795,6 +795,10 @@ lappend encUnencodableStrings {*}{ utf-8 \uDC00 strict {} 0 {} High-surrogate } +# Generated tests comparing against ICU +# TODO - commented out for now as generating a lot of mismatches. +# source [file join [file dirname [info script]] icuUcmTests.tcl] + # Maps utf-{16,32}{le,be} to utf-16, utf-32 and # others to "". Used to test utf-16, utf-32 based # on system endianness diff --git a/tests/icuUcmTests.tcl b/tests/icuUcmTests.tcl new file mode 100644 index 0000000..0c4071f --- /dev/null +++ b/tests/icuUcmTests.tcl @@ -0,0 +1,1891 @@ + +# This file is automatically generated by ucm2tests.tcl. +# Edits will be overwritten on next generation. +# +# Generates tests comparing Tcl encodings to ICU. +# The generated file is NOT standalone. It should be sourced into a test script. + +proc ucmConvertfromMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +proc ucmConverttoMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + + +# +# cp1250 (generated from glibc-CP1250-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1250 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1250 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00BB BB 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A5 0105 B9 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D BC 013E BE 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A 8C 015B 9C 015E AA 015F BA 0160 8A 0161 9A 0162 DE 0163 FE 0164 8D 0165 9D 016E D9 016F F9 0170 DB 0171 FB 0179 8F 017A 9F 017B AF 017C BF 017D 8E 017E 9E 02C7 A1 02D8 A2 02D9 FF 02DB B2 02DD BD 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1250 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1250 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00BB BB 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A5 0105 B9 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D BC 013E BE 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A 8C 015B 9C 015E AA 015F BA 0160 8A 0161 9A 0162 DE 0163 FE 0164 8D 0165 9D 016E D9 016F F9 0170 DB 0171 FB 0179 8F 017A 9F 017B AF 017C BF 017D 8E 017E 9E 02C7 A1 02D8 A2 02D9 FF 02DB B2 02DD BD 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1250 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1250 81 tcl8 \U00000081 -1 {} {} + cp1250 81 replace \uFFFD -1 {} {} + cp1250 81 strict {} 0 {} {} + cp1250 83 tcl8 \U00000083 -1 {} {} + cp1250 83 replace \uFFFD -1 {} {} + cp1250 83 strict {} 0 {} {} + cp1250 88 tcl8 \U00000088 -1 {} {} + cp1250 88 replace \uFFFD -1 {} {} + cp1250 88 strict {} 0 {} {} + cp1250 90 tcl8 \U00000090 -1 {} {} + cp1250 90 replace \uFFFD -1 {} {} + cp1250 90 strict {} 0 {} {} + cp1250 98 tcl8 \U00000098 -1 {} {} + cp1250 98 replace \uFFFD -1 {} {} + cp1250 98 strict {} 0 {} {} +}; # cp1250 + +# cp1250 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1250 \U00000080 tcl8 1A -1 {} {} + cp1250 \U00000080 replace 1A -1 {} {} + cp1250 \U00000080 strict {} 0 {} {} + cp1250 \U00000400 tcl8 1A -1 {} {} + cp1250 \U00000400 replace 1A -1 {} {} + cp1250 \U00000400 strict {} 0 {} {} + cp1250 \U0000D800 tcl8 1A -1 {} {} + cp1250 \U0000D800 replace 1A -1 {} {} + cp1250 \U0000D800 strict {} 0 {} {} + cp1250 \U0000DC00 tcl8 1A -1 {} {} + cp1250 \U0000DC00 replace 1A -1 {} {} + cp1250 \U0000DC00 strict {} 0 {} {} + cp1250 \U00010000 tcl8 1A -1 {} {} + cp1250 \U00010000 replace 1A -1 {} {} + cp1250 \U00010000 strict {} 0 {} {} + cp1250 \U0010FFFF tcl8 1A -1 {} {} + cp1250 \U0010FFFF replace 1A -1 {} {} + cp1250 \U0010FFFF strict {} 0 {} {} +}; # cp1250 + +# +# cp1251 (generated from glibc-CP1251-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1251 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1251 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B5 B5 00B6 B6 00B7 B7 00BB BB 0401 A8 0402 80 0403 81 0404 AA 0405 BD 0406 B2 0407 AF 0408 A3 0409 8A 040A 8C 040B 8E 040C 8D 040E A1 040F 8F 0410 C0 0411 C1 0412 C2 0413 C3 0414 C4 0415 C5 0416 C6 0417 C7 0418 C8 0419 C9 041A CA 041B CB 041C CC 041D CD 041E CE 041F CF 0420 D0 0421 D1 0422 D2 0423 D3 0424 D4 0425 D5 0426 D6 0427 D7 0428 D8 0429 D9 042A DA 042B DB 042C DC 042D DD 042E DE 042F DF 0430 E0 0431 E1 0432 E2 0433 E3 0434 E4 0435 E5 0436 E6 0437 E7 0438 E8 0439 E9 043A EA 043B EB 043C EC 043D ED 043E EE 043F EF 0440 F0 0441 F1 0442 F2 0443 F3 0444 F4 0445 F5 0446 F6 0447 F7 0448 F8 0449 F9 044A FA 044B FB 044C FC 044D FD 044E FE 044F FF 0451 B8 0452 90 0453 83 0454 BA 0455 BE 0456 B3 0457 BF 0458 BC 0459 9A 045A 9C 045B 9E 045C 9D 045E A2 045F 9F 0490 A5 0491 B4 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 88 2116 B9 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1251 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1251 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B5 B5 00B6 B6 00B7 B7 00BB BB 0401 A8 0402 80 0403 81 0404 AA 0405 BD 0406 B2 0407 AF 0408 A3 0409 8A 040A 8C 040B 8E 040C 8D 040E A1 040F 8F 0410 C0 0411 C1 0412 C2 0413 C3 0414 C4 0415 C5 0416 C6 0417 C7 0418 C8 0419 C9 041A CA 041B CB 041C CC 041D CD 041E CE 041F CF 0420 D0 0421 D1 0422 D2 0423 D3 0424 D4 0425 D5 0426 D6 0427 D7 0428 D8 0429 D9 042A DA 042B DB 042C DC 042D DD 042E DE 042F DF 0430 E0 0431 E1 0432 E2 0433 E3 0434 E4 0435 E5 0436 E6 0437 E7 0438 E8 0439 E9 043A EA 043B EB 043C EC 043D ED 043E EE 043F EF 0440 F0 0441 F1 0442 F2 0443 F3 0444 F4 0445 F5 0446 F6 0447 F7 0448 F8 0449 F9 044A FA 044B FB 044C FC 044D FD 044E FE 044F FF 0451 B8 0452 90 0453 83 0454 BA 0455 BE 0456 B3 0457 BF 0458 BC 0459 9A 045A 9C 045B 9E 045C 9D 045E A2 045F 9F 0490 A5 0491 B4 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 88 2116 B9 2122 99} +} -result {} + +# cp1251 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1251 98 tcl8 \U00000098 -1 {} {} + cp1251 98 replace \uFFFD -1 {} {} + cp1251 98 strict {} 0 {} {} +}; # cp1251 + +# cp1251 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1251 \U00000080 tcl8 1A -1 {} {} + cp1251 \U00000080 replace 1A -1 {} {} + cp1251 \U00000080 strict {} 0 {} {} + cp1251 \U00000400 tcl8 1A -1 {} {} + cp1251 \U00000400 replace 1A -1 {} {} + cp1251 \U00000400 strict {} 0 {} {} + cp1251 \U0000D800 tcl8 1A -1 {} {} + cp1251 \U0000D800 replace 1A -1 {} {} + cp1251 \U0000D800 strict {} 0 {} {} + cp1251 \U0000DC00 tcl8 1A -1 {} {} + cp1251 \U0000DC00 replace 1A -1 {} {} + cp1251 \U0000DC00 strict {} 0 {} {} + cp1251 \U00010000 tcl8 1A -1 {} {} + cp1251 \U00010000 replace 1A -1 {} {} + cp1251 \U00010000 strict {} 0 {} {} + cp1251 \U0010FFFF tcl8 1A -1 {} {} + cp1251 \U0010FFFF replace 1A -1 {} {} + cp1251 \U0010FFFF strict {} 0 {} {} +}; # cp1251 + +# +# cp1252 (generated from glibc-CP1252-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1252 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1252 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 8C 0153 9C 0160 8A 0161 9A 0178 9F 017D 8E 017E 9E 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1252 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1252 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 8C 0153 9C 0160 8A 0161 9A 0178 9F 017D 8E 017E 9E 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1252 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1252 81 tcl8 \U00000081 -1 {} {} + cp1252 81 replace \uFFFD -1 {} {} + cp1252 81 strict {} 0 {} {} + cp1252 8D tcl8 \U0000008D -1 {} {} + cp1252 8D replace \uFFFD -1 {} {} + cp1252 8D strict {} 0 {} {} + cp1252 8F tcl8 \U0000008F -1 {} {} + cp1252 8F replace \uFFFD -1 {} {} + cp1252 8F strict {} 0 {} {} + cp1252 90 tcl8 \U00000090 -1 {} {} + cp1252 90 replace \uFFFD -1 {} {} + cp1252 90 strict {} 0 {} {} + cp1252 9D tcl8 \U0000009D -1 {} {} + cp1252 9D replace \uFFFD -1 {} {} + cp1252 9D strict {} 0 {} {} +}; # cp1252 + +# cp1252 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1252 \U00000080 tcl8 1A -1 {} {} + cp1252 \U00000080 replace 1A -1 {} {} + cp1252 \U00000080 strict {} 0 {} {} + cp1252 \U00000400 tcl8 1A -1 {} {} + cp1252 \U00000400 replace 1A -1 {} {} + cp1252 \U00000400 strict {} 0 {} {} + cp1252 \U0000D800 tcl8 1A -1 {} {} + cp1252 \U0000D800 replace 1A -1 {} {} + cp1252 \U0000D800 strict {} 0 {} {} + cp1252 \U0000DC00 tcl8 1A -1 {} {} + cp1252 \U0000DC00 replace 1A -1 {} {} + cp1252 \U0000DC00 strict {} 0 {} {} + cp1252 \U00010000 tcl8 1A -1 {} {} + cp1252 \U00010000 replace 1A -1 {} {} + cp1252 \U00010000 strict {} 0 {} {} + cp1252 \U0010FFFF tcl8 1A -1 {} {} + cp1252 \U0010FFFF replace 1A -1 {} {} + cp1252 \U0010FFFF strict {} 0 {} {} +}; # cp1252 + +# +# cp1253 (generated from glibc-CP1253-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1253 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1253 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00BB BB 00BD BD 0192 83 0384 B4 0385 A1 0386 A2 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2013 96 2014 97 2015 AF 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1253 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1253 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00BB BB 00BD BD 0192 83 0384 B4 0385 A1 0386 A2 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2013 96 2014 97 2015 AF 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1253 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1253 81 tcl8 \U00000081 -1 {} {} + cp1253 81 replace \uFFFD -1 {} {} + cp1253 81 strict {} 0 {} {} + cp1253 88 tcl8 \U00000088 -1 {} {} + cp1253 88 replace \uFFFD -1 {} {} + cp1253 88 strict {} 0 {} {} + cp1253 8A tcl8 \U0000008A -1 {} {} + cp1253 8A replace \uFFFD -1 {} {} + cp1253 8A strict {} 0 {} {} + cp1253 8C tcl8 \U0000008C -1 {} {} + cp1253 8C replace \uFFFD -1 {} {} + cp1253 8C strict {} 0 {} {} + cp1253 8D tcl8 \U0000008D -1 {} {} + cp1253 8D replace \uFFFD -1 {} {} + cp1253 8D strict {} 0 {} {} + cp1253 8E tcl8 \U0000008E -1 {} {} + cp1253 8E replace \uFFFD -1 {} {} + cp1253 8E strict {} 0 {} {} + cp1253 8F tcl8 \U0000008F -1 {} {} + cp1253 8F replace \uFFFD -1 {} {} + cp1253 8F strict {} 0 {} {} + cp1253 90 tcl8 \U00000090 -1 {} {} + cp1253 90 replace \uFFFD -1 {} {} + cp1253 90 strict {} 0 {} {} + cp1253 98 tcl8 \U00000098 -1 {} {} + cp1253 98 replace \uFFFD -1 {} {} + cp1253 98 strict {} 0 {} {} + cp1253 9A tcl8 \U0000009A -1 {} {} + cp1253 9A replace \uFFFD -1 {} {} + cp1253 9A strict {} 0 {} {} + cp1253 9C tcl8 \U0000009C -1 {} {} + cp1253 9C replace \uFFFD -1 {} {} + cp1253 9C strict {} 0 {} {} + cp1253 9D tcl8 \U0000009D -1 {} {} + cp1253 9D replace \uFFFD -1 {} {} + cp1253 9D strict {} 0 {} {} + cp1253 9E tcl8 \U0000009E -1 {} {} + cp1253 9E replace \uFFFD -1 {} {} + cp1253 9E strict {} 0 {} {} + cp1253 9F tcl8 \U0000009F -1 {} {} + cp1253 9F replace \uFFFD -1 {} {} + cp1253 9F strict {} 0 {} {} + cp1253 AA tcl8 \U000000AA -1 {} {} + cp1253 AA replace \uFFFD -1 {} {} + cp1253 AA strict {} 0 {} {} + cp1253 D2 tcl8 \U000000D2 -1 {} {} + cp1253 D2 replace \uFFFD -1 {} {} + cp1253 D2 strict {} 0 {} {} + cp1253 FF tcl8 \U000000FF -1 {} {} + cp1253 FF replace \uFFFD -1 {} {} + cp1253 FF strict {} 0 {} {} +}; # cp1253 + +# cp1253 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1253 \U00000080 tcl8 1A -1 {} {} + cp1253 \U00000080 replace 1A -1 {} {} + cp1253 \U00000080 strict {} 0 {} {} + cp1253 \U00000400 tcl8 1A -1 {} {} + cp1253 \U00000400 replace 1A -1 {} {} + cp1253 \U00000400 strict {} 0 {} {} + cp1253 \U0000D800 tcl8 1A -1 {} {} + cp1253 \U0000D800 replace 1A -1 {} {} + cp1253 \U0000D800 strict {} 0 {} {} + cp1253 \U0000DC00 tcl8 1A -1 {} {} + cp1253 \U0000DC00 replace 1A -1 {} {} + cp1253 \U0000DC00 strict {} 0 {} {} + cp1253 \U00010000 tcl8 1A -1 {} {} + cp1253 \U00010000 replace 1A -1 {} {} + cp1253 \U00010000 strict {} 0 {} {} + cp1253 \U0010FFFF tcl8 1A -1 {} {} + cp1253 \U0010FFFF replace 1A -1 {} {} + cp1253 \U0010FFFF strict {} 0 {} {} +}; # cp1253 + +# +# cp1254 (generated from glibc-CP1254-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1254 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1254 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 0152 8C 0153 9C 015E DE 015F FE 0160 8A 0161 9A 0178 9F 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1254 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1254 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 0152 8C 0153 9C 015E DE 015F FE 0160 8A 0161 9A 0178 9F 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1254 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1254 81 tcl8 \U00000081 -1 {} {} + cp1254 81 replace \uFFFD -1 {} {} + cp1254 81 strict {} 0 {} {} + cp1254 8D tcl8 \U0000008D -1 {} {} + cp1254 8D replace \uFFFD -1 {} {} + cp1254 8D strict {} 0 {} {} + cp1254 8E tcl8 \U0000008E -1 {} {} + cp1254 8E replace \uFFFD -1 {} {} + cp1254 8E strict {} 0 {} {} + cp1254 8F tcl8 \U0000008F -1 {} {} + cp1254 8F replace \uFFFD -1 {} {} + cp1254 8F strict {} 0 {} {} + cp1254 90 tcl8 \U00000090 -1 {} {} + cp1254 90 replace \uFFFD -1 {} {} + cp1254 90 strict {} 0 {} {} + cp1254 9D tcl8 \U0000009D -1 {} {} + cp1254 9D replace \uFFFD -1 {} {} + cp1254 9D strict {} 0 {} {} + cp1254 9E tcl8 \U0000009E -1 {} {} + cp1254 9E replace \uFFFD -1 {} {} + cp1254 9E strict {} 0 {} {} +}; # cp1254 + +# cp1254 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1254 \U00000080 tcl8 1A -1 {} {} + cp1254 \U00000080 replace 1A -1 {} {} + cp1254 \U00000080 strict {} 0 {} {} + cp1254 \U00000400 tcl8 1A -1 {} {} + cp1254 \U00000400 replace 1A -1 {} {} + cp1254 \U00000400 strict {} 0 {} {} + cp1254 \U0000D800 tcl8 1A -1 {} {} + cp1254 \U0000D800 replace 1A -1 {} {} + cp1254 \U0000D800 strict {} 0 {} {} + cp1254 \U0000DC00 tcl8 1A -1 {} {} + cp1254 \U0000DC00 replace 1A -1 {} {} + cp1254 \U0000DC00 strict {} 0 {} {} + cp1254 \U00010000 tcl8 1A -1 {} {} + cp1254 \U00010000 replace 1A -1 {} {} + cp1254 \U00010000 strict {} 0 {} {} + cp1254 \U0010FFFF tcl8 1A -1 {} {} + cp1254 \U0010FFFF replace 1A -1 {} {} + cp1254 \U0010FFFF strict {} 0 {} {} +}; # cp1254 + +# +# cp1255 (generated from glibc-CP1255-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1255 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1255 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00D7 AA 00F7 BA 0192 83 02C6 88 02DC 98 05B0 C0 05B1 C1 05B2 C2 05B3 C3 05B4 C4 05B5 C5 05B6 C6 05B7 C7 05B8 C8 05B9 C9 05BB CB 05BC CC 05BD CD 05BE CE 05BF CF 05C0 D0 05C1 D1 05C2 D2 05C3 D3 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 05F0 D4 05F1 D5 05F2 D6 05F3 D7 05F4 D8 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AA A4 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1255 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1255 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00D7 AA 00F7 BA 0192 83 02C6 88 02DC 98 05B0 C0 05B1 C1 05B2 C2 05B3 C3 05B4 C4 05B5 C5 05B6 C6 05B7 C7 05B8 C8 05B9 C9 05BB CB 05BC CC 05BD CD 05BE CE 05BF CF 05C0 D0 05C1 D1 05C2 D2 05C3 D3 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 05F0 D4 05F1 D5 05F2 D6 05F3 D7 05F4 D8 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AA A4 20AC 80 2122 99} +} -result {} + +# cp1255 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1255 81 tcl8 \U00000081 -1 {} {} + cp1255 81 replace \uFFFD -1 {} {} + cp1255 81 strict {} 0 {} {} + cp1255 8A tcl8 \U0000008A -1 {} {} + cp1255 8A replace \uFFFD -1 {} {} + cp1255 8A strict {} 0 {} {} + cp1255 8C tcl8 \U0000008C -1 {} {} + cp1255 8C replace \uFFFD -1 {} {} + cp1255 8C strict {} 0 {} {} + cp1255 8D tcl8 \U0000008D -1 {} {} + cp1255 8D replace \uFFFD -1 {} {} + cp1255 8D strict {} 0 {} {} + cp1255 8E tcl8 \U0000008E -1 {} {} + cp1255 8E replace \uFFFD -1 {} {} + cp1255 8E strict {} 0 {} {} + cp1255 8F tcl8 \U0000008F -1 {} {} + cp1255 8F replace \uFFFD -1 {} {} + cp1255 8F strict {} 0 {} {} + cp1255 90 tcl8 \U00000090 -1 {} {} + cp1255 90 replace \uFFFD -1 {} {} + cp1255 90 strict {} 0 {} {} + cp1255 9A tcl8 \U0000009A -1 {} {} + cp1255 9A replace \uFFFD -1 {} {} + cp1255 9A strict {} 0 {} {} + cp1255 9C tcl8 \U0000009C -1 {} {} + cp1255 9C replace \uFFFD -1 {} {} + cp1255 9C strict {} 0 {} {} + cp1255 9D tcl8 \U0000009D -1 {} {} + cp1255 9D replace \uFFFD -1 {} {} + cp1255 9D strict {} 0 {} {} + cp1255 9E tcl8 \U0000009E -1 {} {} + cp1255 9E replace \uFFFD -1 {} {} + cp1255 9E strict {} 0 {} {} + cp1255 9F tcl8 \U0000009F -1 {} {} + cp1255 9F replace \uFFFD -1 {} {} + cp1255 9F strict {} 0 {} {} + cp1255 CA tcl8 \U000000CA -1 {} {} + cp1255 CA replace \uFFFD -1 {} {} + cp1255 CA strict {} 0 {} {} + cp1255 D9 tcl8 \U000000D9 -1 {} {} + cp1255 D9 replace \uFFFD -1 {} {} + cp1255 D9 strict {} 0 {} {} + cp1255 DA tcl8 \U000000DA -1 {} {} + cp1255 DA replace \uFFFD -1 {} {} + cp1255 DA strict {} 0 {} {} + cp1255 DB tcl8 \U000000DB -1 {} {} + cp1255 DB replace \uFFFD -1 {} {} + cp1255 DB strict {} 0 {} {} + cp1255 DC tcl8 \U000000DC -1 {} {} + cp1255 DC replace \uFFFD -1 {} {} + cp1255 DC strict {} 0 {} {} + cp1255 DD tcl8 \U000000DD -1 {} {} + cp1255 DD replace \uFFFD -1 {} {} + cp1255 DD strict {} 0 {} {} + cp1255 DE tcl8 \U000000DE -1 {} {} + cp1255 DE replace \uFFFD -1 {} {} + cp1255 DE strict {} 0 {} {} + cp1255 DF tcl8 \U000000DF -1 {} {} + cp1255 DF replace \uFFFD -1 {} {} + cp1255 DF strict {} 0 {} {} + cp1255 FB tcl8 \U000000FB -1 {} {} + cp1255 FB replace \uFFFD -1 {} {} + cp1255 FB strict {} 0 {} {} + cp1255 FC tcl8 \U000000FC -1 {} {} + cp1255 FC replace \uFFFD -1 {} {} + cp1255 FC strict {} 0 {} {} + cp1255 FF tcl8 \U000000FF -1 {} {} + cp1255 FF replace \uFFFD -1 {} {} + cp1255 FF strict {} 0 {} {} +}; # cp1255 + +# cp1255 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1255 \U00000080 tcl8 1A -1 {} {} + cp1255 \U00000080 replace 1A -1 {} {} + cp1255 \U00000080 strict {} 0 {} {} + cp1255 \U00000400 tcl8 1A -1 {} {} + cp1255 \U00000400 replace 1A -1 {} {} + cp1255 \U00000400 strict {} 0 {} {} + cp1255 \U0000D800 tcl8 1A -1 {} {} + cp1255 \U0000D800 replace 1A -1 {} {} + cp1255 \U0000D800 strict {} 0 {} {} + cp1255 \U0000DC00 tcl8 1A -1 {} {} + cp1255 \U0000DC00 replace 1A -1 {} {} + cp1255 \U0000DC00 strict {} 0 {} {} + cp1255 \U00010000 tcl8 1A -1 {} {} + cp1255 \U00010000 replace 1A -1 {} {} + cp1255 \U00010000 strict {} 0 {} {} + cp1255 \U0010FFFF tcl8 1A -1 {} {} + cp1255 \U0010FFFF replace 1A -1 {} {} + cp1255 \U0010FFFF strict {} 0 {} {} +}; # cp1255 + +# +# cp1256 (generated from glibc-CP1256-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1256 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1256 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 D7 00E0 E0 00E2 E2 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EE EE 00EF EF 00F4 F4 00F7 F7 00F9 F9 00FB FB 00FC FC 0152 8C 0153 9C 0192 83 02C6 88 060C A1 061B BA 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D8 0638 D9 0639 DA 063A DB 0640 DC 0641 DD 0642 DE 0643 DF 0644 E1 0645 E3 0646 E4 0647 E5 0648 E6 0649 EC 064A ED 064B F0 064C F1 064D F2 064E F3 064F F5 0650 F6 0651 F8 0652 FA 0679 8A 067E 81 0686 8D 0688 8F 0691 9A 0698 8E 06A9 98 06AF 90 06BA 9F 06BE AA 06C1 C0 06D2 FF 200C 9D 200D 9E 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1256 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1256 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 D7 00E0 E0 00E2 E2 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EE EE 00EF EF 00F4 F4 00F7 F7 00F9 F9 00FB FB 00FC FC 0152 8C 0153 9C 0192 83 02C6 88 060C A1 061B BA 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D8 0638 D9 0639 DA 063A DB 0640 DC 0641 DD 0642 DE 0643 DF 0644 E1 0645 E3 0646 E4 0647 E5 0648 E6 0649 EC 064A ED 064B F0 064C F1 064D F2 064E F3 064F F5 0650 F6 0651 F8 0652 FA 0679 8A 067E 81 0686 8D 0688 8F 0691 9A 0698 8E 06A9 98 06AF 90 06BA 9F 06BE AA 06C1 C0 06D2 FF 200C 9D 200D 9E 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1256 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # cp1256 + +# cp1256 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1256 \U00000080 tcl8 1A -1 {} {} + cp1256 \U00000080 replace 1A -1 {} {} + cp1256 \U00000080 strict {} 0 {} {} + cp1256 \U00000400 tcl8 1A -1 {} {} + cp1256 \U00000400 replace 1A -1 {} {} + cp1256 \U00000400 strict {} 0 {} {} + cp1256 \U0000D800 tcl8 1A -1 {} {} + cp1256 \U0000D800 replace 1A -1 {} {} + cp1256 \U0000D800 strict {} 0 {} {} + cp1256 \U0000DC00 tcl8 1A -1 {} {} + cp1256 \U0000DC00 replace 1A -1 {} {} + cp1256 \U0000DC00 strict {} 0 {} {} + cp1256 \U00010000 tcl8 1A -1 {} {} + cp1256 \U00010000 replace 1A -1 {} {} + cp1256 \U00010000 strict {} 0 {} {} + cp1256 \U0010FFFF tcl8 1A -1 {} {} + cp1256 \U0010FFFF replace 1A -1 {} {} + cp1256 \U0010FFFF strict {} 0 {} {} +}; # cp1256 + +# +# cp1257 (generated from glibc-CP1257-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1257 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1257 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A8 8D 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF 9D 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 8F 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 02C7 8E 02D9 FF 02DB 9E 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1257 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1257 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A8 8D 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF 9D 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 8F 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 02C7 8E 02D9 FF 02DB 9E 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1257 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1257 81 tcl8 \U00000081 -1 {} {} + cp1257 81 replace \uFFFD -1 {} {} + cp1257 81 strict {} 0 {} {} + cp1257 83 tcl8 \U00000083 -1 {} {} + cp1257 83 replace \uFFFD -1 {} {} + cp1257 83 strict {} 0 {} {} + cp1257 88 tcl8 \U00000088 -1 {} {} + cp1257 88 replace \uFFFD -1 {} {} + cp1257 88 strict {} 0 {} {} + cp1257 8A tcl8 \U0000008A -1 {} {} + cp1257 8A replace \uFFFD -1 {} {} + cp1257 8A strict {} 0 {} {} + cp1257 8C tcl8 \U0000008C -1 {} {} + cp1257 8C replace \uFFFD -1 {} {} + cp1257 8C strict {} 0 {} {} + cp1257 90 tcl8 \U00000090 -1 {} {} + cp1257 90 replace \uFFFD -1 {} {} + cp1257 90 strict {} 0 {} {} + cp1257 98 tcl8 \U00000098 -1 {} {} + cp1257 98 replace \uFFFD -1 {} {} + cp1257 98 strict {} 0 {} {} + cp1257 9A tcl8 \U0000009A -1 {} {} + cp1257 9A replace \uFFFD -1 {} {} + cp1257 9A strict {} 0 {} {} + cp1257 9C tcl8 \U0000009C -1 {} {} + cp1257 9C replace \uFFFD -1 {} {} + cp1257 9C strict {} 0 {} {} + cp1257 9F tcl8 \U0000009F -1 {} {} + cp1257 9F replace \uFFFD -1 {} {} + cp1257 9F strict {} 0 {} {} + cp1257 A1 tcl8 \U000000A1 -1 {} {} + cp1257 A1 replace \uFFFD -1 {} {} + cp1257 A1 strict {} 0 {} {} + cp1257 A5 tcl8 \U000000A5 -1 {} {} + cp1257 A5 replace \uFFFD -1 {} {} + cp1257 A5 strict {} 0 {} {} +}; # cp1257 + +# cp1257 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1257 \U00000080 tcl8 1A -1 {} {} + cp1257 \U00000080 replace 1A -1 {} {} + cp1257 \U00000080 strict {} 0 {} {} + cp1257 \U00000400 tcl8 1A -1 {} {} + cp1257 \U00000400 replace 1A -1 {} {} + cp1257 \U00000400 strict {} 0 {} {} + cp1257 \U0000D800 tcl8 1A -1 {} {} + cp1257 \U0000D800 replace 1A -1 {} {} + cp1257 \U0000D800 strict {} 0 {} {} + cp1257 \U0000DC00 tcl8 1A -1 {} {} + cp1257 \U0000DC00 replace 1A -1 {} {} + cp1257 \U0000DC00 strict {} 0 {} {} + cp1257 \U00010000 tcl8 1A -1 {} {} + cp1257 \U00010000 replace 1A -1 {} {} + cp1257 \U00010000 strict {} 0 {} {} + cp1257 \U0010FFFF tcl8 1A -1 {} {} + cp1257 \U0010FFFF replace 1A -1 {} {} + cp1257 \U0010FFFF strict {} 0 {} {} +}; # cp1257 + +# +# cp1258 (generated from glibc-CP1258-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1258 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1258 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CD CD 00CE CE 00CF CF 00D1 D1 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00ED ED 00EE EE 00EF EF 00F1 F1 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0110 D0 0111 F0 0152 8C 0153 9C 0178 9F 0192 83 01A0 D5 01A1 F5 01AF DD 01B0 FD 02C6 88 02DC 98 0300 CC 0303 DE 0309 D2 0323 F2 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AB FE 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1258 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1258 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CD CD 00CE CE 00CF CF 00D1 D1 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00ED ED 00EE EE 00EF EF 00F1 F1 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0110 D0 0111 F0 0152 8C 0153 9C 0178 9F 0192 83 01A0 D5 01A1 F5 01AF DD 01B0 FD 02C6 88 02DC 98 0300 CC 0303 DE 0309 D2 0323 F2 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AB FE 20AC 80 2122 99} +} -result {} + +# cp1258 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1258 81 tcl8 \U00000081 -1 {} {} + cp1258 81 replace \uFFFD -1 {} {} + cp1258 81 strict {} 0 {} {} + cp1258 8A tcl8 \U0000008A -1 {} {} + cp1258 8A replace \uFFFD -1 {} {} + cp1258 8A strict {} 0 {} {} + cp1258 8D tcl8 \U0000008D -1 {} {} + cp1258 8D replace \uFFFD -1 {} {} + cp1258 8D strict {} 0 {} {} + cp1258 8E tcl8 \U0000008E -1 {} {} + cp1258 8E replace \uFFFD -1 {} {} + cp1258 8E strict {} 0 {} {} + cp1258 8F tcl8 \U0000008F -1 {} {} + cp1258 8F replace \uFFFD -1 {} {} + cp1258 8F strict {} 0 {} {} + cp1258 90 tcl8 \U00000090 -1 {} {} + cp1258 90 replace \uFFFD -1 {} {} + cp1258 90 strict {} 0 {} {} + cp1258 9A tcl8 \U0000009A -1 {} {} + cp1258 9A replace \uFFFD -1 {} {} + cp1258 9A strict {} 0 {} {} + cp1258 9D tcl8 \U0000009D -1 {} {} + cp1258 9D replace \uFFFD -1 {} {} + cp1258 9D strict {} 0 {} {} + cp1258 9E tcl8 \U0000009E -1 {} {} + cp1258 9E replace \uFFFD -1 {} {} + cp1258 9E strict {} 0 {} {} + cp1258 EC tcl8 \U000000EC -1 {} {} + cp1258 EC replace \uFFFD -1 {} {} + cp1258 EC strict {} 0 {} {} +}; # cp1258 + +# cp1258 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1258 \U00000080 tcl8 1A -1 {} {} + cp1258 \U00000080 replace 1A -1 {} {} + cp1258 \U00000080 strict {} 0 {} {} + cp1258 \U00000400 tcl8 1A -1 {} {} + cp1258 \U00000400 replace 1A -1 {} {} + cp1258 \U00000400 strict {} 0 {} {} + cp1258 \U0000D800 tcl8 1A -1 {} {} + cp1258 \U0000D800 replace 1A -1 {} {} + cp1258 \U0000D800 strict {} 0 {} {} + cp1258 \U0000DC00 tcl8 1A -1 {} {} + cp1258 \U0000DC00 replace 1A -1 {} {} + cp1258 \U0000DC00 strict {} 0 {} {} + cp1258 \U00010000 tcl8 1A -1 {} {} + cp1258 \U00010000 replace 1A -1 {} {} + cp1258 \U00010000 strict {} 0 {} {} + cp1258 \U0010FFFF tcl8 1A -1 {} {} + cp1258 \U0010FFFF replace 1A -1 {} {} + cp1258 \U0010FFFF strict {} 0 {} {} +}; # cp1258 + +# +# gb1988 (generated from glibc-GB_1988_80-2.3.3) + +test encoding-convertfrom-ucmCompare-gb1988 {Compare against ICU UCM} -body { + ucmConvertfromMismatches gb1988 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007F 7F 00A5 24 203E 7E} +} -result {} + +test encoding-convertto-ucmCompare-gb1988 {Compare against ICU UCM} -body { + ucmConverttoMismatches gb1988 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007F 7F 00A5 24 203E 7E} +} -result {} + +# gb1988 - invalid byte sequences +lappend encInvalidBytes {*}{ + gb1988 80 tcl8 \U00000080 -1 {} {} + gb1988 80 replace \uFFFD -1 {} {} + gb1988 80 strict {} 0 {} {} + gb1988 81 tcl8 \U00000081 -1 {} {} + gb1988 81 replace \uFFFD -1 {} {} + gb1988 81 strict {} 0 {} {} + gb1988 82 tcl8 \U00000082 -1 {} {} + gb1988 82 replace \uFFFD -1 {} {} + gb1988 82 strict {} 0 {} {} + gb1988 83 tcl8 \U00000083 -1 {} {} + gb1988 83 replace \uFFFD -1 {} {} + gb1988 83 strict {} 0 {} {} + gb1988 84 tcl8 \U00000084 -1 {} {} + gb1988 84 replace \uFFFD -1 {} {} + gb1988 84 strict {} 0 {} {} + gb1988 85 tcl8 \U00000085 -1 {} {} + gb1988 85 replace \uFFFD -1 {} {} + gb1988 85 strict {} 0 {} {} + gb1988 86 tcl8 \U00000086 -1 {} {} + gb1988 86 replace \uFFFD -1 {} {} + gb1988 86 strict {} 0 {} {} + gb1988 87 tcl8 \U00000087 -1 {} {} + gb1988 87 replace \uFFFD -1 {} {} + gb1988 87 strict {} 0 {} {} + gb1988 88 tcl8 \U00000088 -1 {} {} + gb1988 88 replace \uFFFD -1 {} {} + gb1988 88 strict {} 0 {} {} + gb1988 89 tcl8 \U00000089 -1 {} {} + gb1988 89 replace \uFFFD -1 {} {} + gb1988 89 strict {} 0 {} {} + gb1988 8A tcl8 \U0000008A -1 {} {} + gb1988 8A replace \uFFFD -1 {} {} + gb1988 8A strict {} 0 {} {} + gb1988 8B tcl8 \U0000008B -1 {} {} + gb1988 8B replace \uFFFD -1 {} {} + gb1988 8B strict {} 0 {} {} + gb1988 8C tcl8 \U0000008C -1 {} {} + gb1988 8C replace \uFFFD -1 {} {} + gb1988 8C strict {} 0 {} {} + gb1988 8D tcl8 \U0000008D -1 {} {} + gb1988 8D replace \uFFFD -1 {} {} + gb1988 8D strict {} 0 {} {} + gb1988 8E tcl8 \U0000008E -1 {} {} + gb1988 8E replace \uFFFD -1 {} {} + gb1988 8E strict {} 0 {} {} + gb1988 8F tcl8 \U0000008F -1 {} {} + gb1988 8F replace \uFFFD -1 {} {} + gb1988 8F strict {} 0 {} {} + gb1988 90 tcl8 \U00000090 -1 {} {} + gb1988 90 replace \uFFFD -1 {} {} + gb1988 90 strict {} 0 {} {} + gb1988 91 tcl8 \U00000091 -1 {} {} + gb1988 91 replace \uFFFD -1 {} {} + gb1988 91 strict {} 0 {} {} + gb1988 92 tcl8 \U00000092 -1 {} {} + gb1988 92 replace \uFFFD -1 {} {} + gb1988 92 strict {} 0 {} {} + gb1988 93 tcl8 \U00000093 -1 {} {} + gb1988 93 replace \uFFFD -1 {} {} + gb1988 93 strict {} 0 {} {} + gb1988 94 tcl8 \U00000094 -1 {} {} + gb1988 94 replace \uFFFD -1 {} {} + gb1988 94 strict {} 0 {} {} + gb1988 95 tcl8 \U00000095 -1 {} {} + gb1988 95 replace \uFFFD -1 {} {} + gb1988 95 strict {} 0 {} {} + gb1988 96 tcl8 \U00000096 -1 {} {} + gb1988 96 replace \uFFFD -1 {} {} + gb1988 96 strict {} 0 {} {} + gb1988 97 tcl8 \U00000097 -1 {} {} + gb1988 97 replace \uFFFD -1 {} {} + gb1988 97 strict {} 0 {} {} + gb1988 98 tcl8 \U00000098 -1 {} {} + gb1988 98 replace \uFFFD -1 {} {} + gb1988 98 strict {} 0 {} {} + gb1988 99 tcl8 \U00000099 -1 {} {} + gb1988 99 replace \uFFFD -1 {} {} + gb1988 99 strict {} 0 {} {} + gb1988 9A tcl8 \U0000009A -1 {} {} + gb1988 9A replace \uFFFD -1 {} {} + gb1988 9A strict {} 0 {} {} + gb1988 9B tcl8 \U0000009B -1 {} {} + gb1988 9B replace \uFFFD -1 {} {} + gb1988 9B strict {} 0 {} {} + gb1988 9C tcl8 \U0000009C -1 {} {} + gb1988 9C replace \uFFFD -1 {} {} + gb1988 9C strict {} 0 {} {} + gb1988 9D tcl8 \U0000009D -1 {} {} + gb1988 9D replace \uFFFD -1 {} {} + gb1988 9D strict {} 0 {} {} + gb1988 9E tcl8 \U0000009E -1 {} {} + gb1988 9E replace \uFFFD -1 {} {} + gb1988 9E strict {} 0 {} {} + gb1988 9F tcl8 \U0000009F -1 {} {} + gb1988 9F replace \uFFFD -1 {} {} + gb1988 9F strict {} 0 {} {} + gb1988 A0 tcl8 \U000000A0 -1 {} {} + gb1988 A0 replace \uFFFD -1 {} {} + gb1988 A0 strict {} 0 {} {} + gb1988 A1 tcl8 \U000000A1 -1 {} {} + gb1988 A1 replace \uFFFD -1 {} {} + gb1988 A1 strict {} 0 {} {} + gb1988 A2 tcl8 \U000000A2 -1 {} {} + gb1988 A2 replace \uFFFD -1 {} {} + gb1988 A2 strict {} 0 {} {} + gb1988 A3 tcl8 \U000000A3 -1 {} {} + gb1988 A3 replace \uFFFD -1 {} {} + gb1988 A3 strict {} 0 {} {} + gb1988 A4 tcl8 \U000000A4 -1 {} {} + gb1988 A4 replace \uFFFD -1 {} {} + gb1988 A4 strict {} 0 {} {} + gb1988 A5 tcl8 \U000000A5 -1 {} {} + gb1988 A5 replace \uFFFD -1 {} {} + gb1988 A5 strict {} 0 {} {} + gb1988 A6 tcl8 \U000000A6 -1 {} {} + gb1988 A6 replace \uFFFD -1 {} {} + gb1988 A6 strict {} 0 {} {} + gb1988 A7 tcl8 \U000000A7 -1 {} {} + gb1988 A7 replace \uFFFD -1 {} {} + gb1988 A7 strict {} 0 {} {} + gb1988 A8 tcl8 \U000000A8 -1 {} {} + gb1988 A8 replace \uFFFD -1 {} {} + gb1988 A8 strict {} 0 {} {} + gb1988 A9 tcl8 \U000000A9 -1 {} {} + gb1988 A9 replace \uFFFD -1 {} {} + gb1988 A9 strict {} 0 {} {} + gb1988 AA tcl8 \U000000AA -1 {} {} + gb1988 AA replace \uFFFD -1 {} {} + gb1988 AA strict {} 0 {} {} + gb1988 AB tcl8 \U000000AB -1 {} {} + gb1988 AB replace \uFFFD -1 {} {} + gb1988 AB strict {} 0 {} {} + gb1988 AC tcl8 \U000000AC -1 {} {} + gb1988 AC replace \uFFFD -1 {} {} + gb1988 AC strict {} 0 {} {} + gb1988 AD tcl8 \U000000AD -1 {} {} + gb1988 AD replace \uFFFD -1 {} {} + gb1988 AD strict {} 0 {} {} + gb1988 AE tcl8 \U000000AE -1 {} {} + gb1988 AE replace \uFFFD -1 {} {} + gb1988 AE strict {} 0 {} {} + gb1988 AF tcl8 \U000000AF -1 {} {} + gb1988 AF replace \uFFFD -1 {} {} + gb1988 AF strict {} 0 {} {} + gb1988 B0 tcl8 \U000000B0 -1 {} {} + gb1988 B0 replace \uFFFD -1 {} {} + gb1988 B0 strict {} 0 {} {} + gb1988 B1 tcl8 \U000000B1 -1 {} {} + gb1988 B1 replace \uFFFD -1 {} {} + gb1988 B1 strict {} 0 {} {} + gb1988 B2 tcl8 \U000000B2 -1 {} {} + gb1988 B2 replace \uFFFD -1 {} {} + gb1988 B2 strict {} 0 {} {} + gb1988 B3 tcl8 \U000000B3 -1 {} {} + gb1988 B3 replace \uFFFD -1 {} {} + gb1988 B3 strict {} 0 {} {} + gb1988 B4 tcl8 \U000000B4 -1 {} {} + gb1988 B4 replace \uFFFD -1 {} {} + gb1988 B4 strict {} 0 {} {} + gb1988 B5 tcl8 \U000000B5 -1 {} {} + gb1988 B5 replace \uFFFD -1 {} {} + gb1988 B5 strict {} 0 {} {} + gb1988 B6 tcl8 \U000000B6 -1 {} {} + gb1988 B6 replace \uFFFD -1 {} {} + gb1988 B6 strict {} 0 {} {} + gb1988 B7 tcl8 \U000000B7 -1 {} {} + gb1988 B7 replace \uFFFD -1 {} {} + gb1988 B7 strict {} 0 {} {} + gb1988 B8 tcl8 \U000000B8 -1 {} {} + gb1988 B8 replace \uFFFD -1 {} {} + gb1988 B8 strict {} 0 {} {} + gb1988 B9 tcl8 \U000000B9 -1 {} {} + gb1988 B9 replace \uFFFD -1 {} {} + gb1988 B9 strict {} 0 {} {} + gb1988 BA tcl8 \U000000BA -1 {} {} + gb1988 BA replace \uFFFD -1 {} {} + gb1988 BA strict {} 0 {} {} + gb1988 BB tcl8 \U000000BB -1 {} {} + gb1988 BB replace \uFFFD -1 {} {} + gb1988 BB strict {} 0 {} {} + gb1988 BC tcl8 \U000000BC -1 {} {} + gb1988 BC replace \uFFFD -1 {} {} + gb1988 BC strict {} 0 {} {} + gb1988 BD tcl8 \U000000BD -1 {} {} + gb1988 BD replace \uFFFD -1 {} {} + gb1988 BD strict {} 0 {} {} + gb1988 BE tcl8 \U000000BE -1 {} {} + gb1988 BE replace \uFFFD -1 {} {} + gb1988 BE strict {} 0 {} {} + gb1988 BF tcl8 \U000000BF -1 {} {} + gb1988 BF replace \uFFFD -1 {} {} + gb1988 BF strict {} 0 {} {} + gb1988 C0 tcl8 \U000000C0 -1 {} {} + gb1988 C0 replace \uFFFD -1 {} {} + gb1988 C0 strict {} 0 {} {} + gb1988 C1 tcl8 \U000000C1 -1 {} {} + gb1988 C1 replace \uFFFD -1 {} {} + gb1988 C1 strict {} 0 {} {} + gb1988 C2 tcl8 \U000000C2 -1 {} {} + gb1988 C2 replace \uFFFD -1 {} {} + gb1988 C2 strict {} 0 {} {} + gb1988 C3 tcl8 \U000000C3 -1 {} {} + gb1988 C3 replace \uFFFD -1 {} {} + gb1988 C3 strict {} 0 {} {} + gb1988 C4 tcl8 \U000000C4 -1 {} {} + gb1988 C4 replace \uFFFD -1 {} {} + gb1988 C4 strict {} 0 {} {} + gb1988 C5 tcl8 \U000000C5 -1 {} {} + gb1988 C5 replace \uFFFD -1 {} {} + gb1988 C5 strict {} 0 {} {} + gb1988 C6 tcl8 \U000000C6 -1 {} {} + gb1988 C6 replace \uFFFD -1 {} {} + gb1988 C6 strict {} 0 {} {} + gb1988 C7 tcl8 \U000000C7 -1 {} {} + gb1988 C7 replace \uFFFD -1 {} {} + gb1988 C7 strict {} 0 {} {} + gb1988 C8 tcl8 \U000000C8 -1 {} {} + gb1988 C8 replace \uFFFD -1 {} {} + gb1988 C8 strict {} 0 {} {} + gb1988 C9 tcl8 \U000000C9 -1 {} {} + gb1988 C9 replace \uFFFD -1 {} {} + gb1988 C9 strict {} 0 {} {} + gb1988 CA tcl8 \U000000CA -1 {} {} + gb1988 CA replace \uFFFD -1 {} {} + gb1988 CA strict {} 0 {} {} + gb1988 CB tcl8 \U000000CB -1 {} {} + gb1988 CB replace \uFFFD -1 {} {} + gb1988 CB strict {} 0 {} {} + gb1988 CC tcl8 \U000000CC -1 {} {} + gb1988 CC replace \uFFFD -1 {} {} + gb1988 CC strict {} 0 {} {} + gb1988 CD tcl8 \U000000CD -1 {} {} + gb1988 CD replace \uFFFD -1 {} {} + gb1988 CD strict {} 0 {} {} + gb1988 CE tcl8 \U000000CE -1 {} {} + gb1988 CE replace \uFFFD -1 {} {} + gb1988 CE strict {} 0 {} {} + gb1988 CF tcl8 \U000000CF -1 {} {} + gb1988 CF replace \uFFFD -1 {} {} + gb1988 CF strict {} 0 {} {} + gb1988 D0 tcl8 \U000000D0 -1 {} {} + gb1988 D0 replace \uFFFD -1 {} {} + gb1988 D0 strict {} 0 {} {} + gb1988 D1 tcl8 \U000000D1 -1 {} {} + gb1988 D1 replace \uFFFD -1 {} {} + gb1988 D1 strict {} 0 {} {} + gb1988 D2 tcl8 \U000000D2 -1 {} {} + gb1988 D2 replace \uFFFD -1 {} {} + gb1988 D2 strict {} 0 {} {} + gb1988 D3 tcl8 \U000000D3 -1 {} {} + gb1988 D3 replace \uFFFD -1 {} {} + gb1988 D3 strict {} 0 {} {} + gb1988 D4 tcl8 \U000000D4 -1 {} {} + gb1988 D4 replace \uFFFD -1 {} {} + gb1988 D4 strict {} 0 {} {} + gb1988 D5 tcl8 \U000000D5 -1 {} {} + gb1988 D5 replace \uFFFD -1 {} {} + gb1988 D5 strict {} 0 {} {} + gb1988 D6 tcl8 \U000000D6 -1 {} {} + gb1988 D6 replace \uFFFD -1 {} {} + gb1988 D6 strict {} 0 {} {} + gb1988 D7 tcl8 \U000000D7 -1 {} {} + gb1988 D7 replace \uFFFD -1 {} {} + gb1988 D7 strict {} 0 {} {} + gb1988 D8 tcl8 \U000000D8 -1 {} {} + gb1988 D8 replace \uFFFD -1 {} {} + gb1988 D8 strict {} 0 {} {} + gb1988 D9 tcl8 \U000000D9 -1 {} {} + gb1988 D9 replace \uFFFD -1 {} {} + gb1988 D9 strict {} 0 {} {} + gb1988 DA tcl8 \U000000DA -1 {} {} + gb1988 DA replace \uFFFD -1 {} {} + gb1988 DA strict {} 0 {} {} + gb1988 DB tcl8 \U000000DB -1 {} {} + gb1988 DB replace \uFFFD -1 {} {} + gb1988 DB strict {} 0 {} {} + gb1988 DC tcl8 \U000000DC -1 {} {} + gb1988 DC replace \uFFFD -1 {} {} + gb1988 DC strict {} 0 {} {} + gb1988 DD tcl8 \U000000DD -1 {} {} + gb1988 DD replace \uFFFD -1 {} {} + gb1988 DD strict {} 0 {} {} + gb1988 DE tcl8 \U000000DE -1 {} {} + gb1988 DE replace \uFFFD -1 {} {} + gb1988 DE strict {} 0 {} {} + gb1988 DF tcl8 \U000000DF -1 {} {} + gb1988 DF replace \uFFFD -1 {} {} + gb1988 DF strict {} 0 {} {} + gb1988 E0 tcl8 \U000000E0 -1 {} {} + gb1988 E0 replace \uFFFD -1 {} {} + gb1988 E0 strict {} 0 {} {} + gb1988 E1 tcl8 \U000000E1 -1 {} {} + gb1988 E1 replace \uFFFD -1 {} {} + gb1988 E1 strict {} 0 {} {} + gb1988 E2 tcl8 \U000000E2 -1 {} {} + gb1988 E2 replace \uFFFD -1 {} {} + gb1988 E2 strict {} 0 {} {} + gb1988 E3 tcl8 \U000000E3 -1 {} {} + gb1988 E3 replace \uFFFD -1 {} {} + gb1988 E3 strict {} 0 {} {} + gb1988 E4 tcl8 \U000000E4 -1 {} {} + gb1988 E4 replace \uFFFD -1 {} {} + gb1988 E4 strict {} 0 {} {} + gb1988 E5 tcl8 \U000000E5 -1 {} {} + gb1988 E5 replace \uFFFD -1 {} {} + gb1988 E5 strict {} 0 {} {} + gb1988 E6 tcl8 \U000000E6 -1 {} {} + gb1988 E6 replace \uFFFD -1 {} {} + gb1988 E6 strict {} 0 {} {} + gb1988 E7 tcl8 \U000000E7 -1 {} {} + gb1988 E7 replace \uFFFD -1 {} {} + gb1988 E7 strict {} 0 {} {} + gb1988 E8 tcl8 \U000000E8 -1 {} {} + gb1988 E8 replace \uFFFD -1 {} {} + gb1988 E8 strict {} 0 {} {} + gb1988 E9 tcl8 \U000000E9 -1 {} {} + gb1988 E9 replace \uFFFD -1 {} {} + gb1988 E9 strict {} 0 {} {} + gb1988 EA tcl8 \U000000EA -1 {} {} + gb1988 EA replace \uFFFD -1 {} {} + gb1988 EA strict {} 0 {} {} + gb1988 EB tcl8 \U000000EB -1 {} {} + gb1988 EB replace \uFFFD -1 {} {} + gb1988 EB strict {} 0 {} {} + gb1988 EC tcl8 \U000000EC -1 {} {} + gb1988 EC replace \uFFFD -1 {} {} + gb1988 EC strict {} 0 {} {} + gb1988 ED tcl8 \U000000ED -1 {} {} + gb1988 ED replace \uFFFD -1 {} {} + gb1988 ED strict {} 0 {} {} + gb1988 EE tcl8 \U000000EE -1 {} {} + gb1988 EE replace \uFFFD -1 {} {} + gb1988 EE strict {} 0 {} {} + gb1988 EF tcl8 \U000000EF -1 {} {} + gb1988 EF replace \uFFFD -1 {} {} + gb1988 EF strict {} 0 {} {} + gb1988 F0 tcl8 \U000000F0 -1 {} {} + gb1988 F0 replace \uFFFD -1 {} {} + gb1988 F0 strict {} 0 {} {} + gb1988 F1 tcl8 \U000000F1 -1 {} {} + gb1988 F1 replace \uFFFD -1 {} {} + gb1988 F1 strict {} 0 {} {} + gb1988 F2 tcl8 \U000000F2 -1 {} {} + gb1988 F2 replace \uFFFD -1 {} {} + gb1988 F2 strict {} 0 {} {} + gb1988 F3 tcl8 \U000000F3 -1 {} {} + gb1988 F3 replace \uFFFD -1 {} {} + gb1988 F3 strict {} 0 {} {} + gb1988 F4 tcl8 \U000000F4 -1 {} {} + gb1988 F4 replace \uFFFD -1 {} {} + gb1988 F4 strict {} 0 {} {} + gb1988 F5 tcl8 \U000000F5 -1 {} {} + gb1988 F5 replace \uFFFD -1 {} {} + gb1988 F5 strict {} 0 {} {} + gb1988 F6 tcl8 \U000000F6 -1 {} {} + gb1988 F6 replace \uFFFD -1 {} {} + gb1988 F6 strict {} 0 {} {} + gb1988 F7 tcl8 \U000000F7 -1 {} {} + gb1988 F7 replace \uFFFD -1 {} {} + gb1988 F7 strict {} 0 {} {} + gb1988 F8 tcl8 \U000000F8 -1 {} {} + gb1988 F8 replace \uFFFD -1 {} {} + gb1988 F8 strict {} 0 {} {} + gb1988 F9 tcl8 \U000000F9 -1 {} {} + gb1988 F9 replace \uFFFD -1 {} {} + gb1988 F9 strict {} 0 {} {} + gb1988 FA tcl8 \U000000FA -1 {} {} + gb1988 FA replace \uFFFD -1 {} {} + gb1988 FA strict {} 0 {} {} + gb1988 FB tcl8 \U000000FB -1 {} {} + gb1988 FB replace \uFFFD -1 {} {} + gb1988 FB strict {} 0 {} {} + gb1988 FC tcl8 \U000000FC -1 {} {} + gb1988 FC replace \uFFFD -1 {} {} + gb1988 FC strict {} 0 {} {} + gb1988 FD tcl8 \U000000FD -1 {} {} + gb1988 FD replace \uFFFD -1 {} {} + gb1988 FD strict {} 0 {} {} + gb1988 FE tcl8 \U000000FE -1 {} {} + gb1988 FE replace \uFFFD -1 {} {} + gb1988 FE strict {} 0 {} {} + gb1988 FF tcl8 \U000000FF -1 {} {} + gb1988 FF replace \uFFFD -1 {} {} + gb1988 FF strict {} 0 {} {} +}; # gb1988 + +# gb1988 - invalid byte sequences +lappend encUnencodableStrings {*}{ + gb1988 \U00000024 tcl8 1A -1 {} {} + gb1988 \U00000024 replace 1A -1 {} {} + gb1988 \U00000024 strict {} 0 {} {} + gb1988 \U00000400 tcl8 1A -1 {} {} + gb1988 \U00000400 replace 1A -1 {} {} + gb1988 \U00000400 strict {} 0 {} {} + gb1988 \U0000D800 tcl8 1A -1 {} {} + gb1988 \U0000D800 replace 1A -1 {} {} + gb1988 \U0000D800 strict {} 0 {} {} + gb1988 \U0000DC00 tcl8 1A -1 {} {} + gb1988 \U0000DC00 replace 1A -1 {} {} + gb1988 \U0000DC00 strict {} 0 {} {} + gb1988 \U00010000 tcl8 1A -1 {} {} + gb1988 \U00010000 replace 1A -1 {} {} + gb1988 \U00010000 strict {} 0 {} {} + gb1988 \U0010FFFF tcl8 1A -1 {} {} + gb1988 \U0010FFFF replace 1A -1 {} {} + gb1988 \U0010FFFF strict {} 0 {} {} +}; # gb1988 + +# +# iso8859-1 (generated from glibc-ISO_8859_1-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-1 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-1 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-1 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-1 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF} +} -result {} + +# iso8859-1 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-1 + +# iso8859-1 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-1 \U00000400 tcl8 1A -1 {} {} + iso8859-1 \U00000400 replace 1A -1 {} {} + iso8859-1 \U00000400 strict {} 0 {} {} + iso8859-1 \U0000D800 tcl8 1A -1 {} {} + iso8859-1 \U0000D800 replace 1A -1 {} {} + iso8859-1 \U0000D800 strict {} 0 {} {} + iso8859-1 \U0000DC00 tcl8 1A -1 {} {} + iso8859-1 \U0000DC00 replace 1A -1 {} {} + iso8859-1 \U0000DC00 strict {} 0 {} {} + iso8859-1 \U00010000 tcl8 1A -1 {} {} + iso8859-1 \U00010000 replace 1A -1 {} {} + iso8859-1 \U00010000 strict {} 0 {} {} + iso8859-1 \U0010FFFF tcl8 1A -1 {} {} + iso8859-1 \U0010FFFF replace 1A -1 {} {} + iso8859-1 \U0010FFFF strict {} 0 {} {} +}; # iso8859-1 + +# +# iso8859-2 (generated from glibc-ISO_8859_2-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-2 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-2 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A1 0105 B1 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D A5 013E B5 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A A6 015B B6 015E AA 015F BA 0160 A9 0161 B9 0162 DE 0163 FE 0164 AB 0165 BB 016E D9 016F F9 0170 DB 0171 FB 0179 AC 017A BC 017B AF 017C BF 017D AE 017E BE 02C7 B7 02D8 A2 02D9 FF 02DB B2 02DD BD} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-2 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-2 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A1 0105 B1 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D A5 013E B5 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A A6 015B B6 015E AA 015F BA 0160 A9 0161 B9 0162 DE 0163 FE 0164 AB 0165 BB 016E D9 016F F9 0170 DB 0171 FB 0179 AC 017A BC 017B AF 017C BF 017D AE 017E BE 02C7 B7 02D8 A2 02D9 FF 02DB B2 02DD BD} +} -result {} + +# iso8859-2 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-2 + +# iso8859-2 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-2 \U000000A1 tcl8 1A -1 {} {} + iso8859-2 \U000000A1 replace 1A -1 {} {} + iso8859-2 \U000000A1 strict {} 0 {} {} + iso8859-2 \U00000400 tcl8 1A -1 {} {} + iso8859-2 \U00000400 replace 1A -1 {} {} + iso8859-2 \U00000400 strict {} 0 {} {} + iso8859-2 \U0000D800 tcl8 1A -1 {} {} + iso8859-2 \U0000D800 replace 1A -1 {} {} + iso8859-2 \U0000D800 strict {} 0 {} {} + iso8859-2 \U0000DC00 tcl8 1A -1 {} {} + iso8859-2 \U0000DC00 replace 1A -1 {} {} + iso8859-2 \U0000DC00 strict {} 0 {} {} + iso8859-2 \U00010000 tcl8 1A -1 {} {} + iso8859-2 \U00010000 replace 1A -1 {} {} + iso8859-2 \U00010000 strict {} 0 {} {} + iso8859-2 \U0010FFFF tcl8 1A -1 {} {} + iso8859-2 \U0010FFFF replace 1A -1 {} {} + iso8859-2 \U0010FFFF strict {} 0 {} {} +}; # iso8859-2 + +# +# iso8859-3 (generated from glibc-ISO_8859_3-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-3 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-3 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B7 B7 00B8 B8 00BD BD 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F9 F9 00FA FA 00FB FB 00FC FC 0108 C6 0109 E6 010A C5 010B E5 011C D8 011D F8 011E AB 011F BB 0120 D5 0121 F5 0124 A6 0125 B6 0126 A1 0127 B1 0130 A9 0131 B9 0134 AC 0135 BC 015C DE 015D FE 015E AA 015F BA 016C DD 016D FD 017B AF 017C BF 02D8 A2 02D9 FF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-3 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-3 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B7 B7 00B8 B8 00BD BD 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F9 F9 00FA FA 00FB FB 00FC FC 0108 C6 0109 E6 010A C5 010B E5 011C D8 011D F8 011E AB 011F BB 0120 D5 0121 F5 0124 A6 0125 B6 0126 A1 0127 B1 0130 A9 0131 B9 0134 AC 0135 BC 015C DE 015D FE 015E AA 015F BA 016C DD 016D FD 017B AF 017C BF 02D8 A2 02D9 FF} +} -result {} + +# iso8859-3 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-3 A5 tcl8 \U000000A5 -1 {} {} + iso8859-3 A5 replace \uFFFD -1 {} {} + iso8859-3 A5 strict {} 0 {} {} + iso8859-3 AE tcl8 \U000000AE -1 {} {} + iso8859-3 AE replace \uFFFD -1 {} {} + iso8859-3 AE strict {} 0 {} {} + iso8859-3 BE tcl8 \U000000BE -1 {} {} + iso8859-3 BE replace \uFFFD -1 {} {} + iso8859-3 BE strict {} 0 {} {} + iso8859-3 C3 tcl8 \U000000C3 -1 {} {} + iso8859-3 C3 replace \uFFFD -1 {} {} + iso8859-3 C3 strict {} 0 {} {} + iso8859-3 D0 tcl8 \U000000D0 -1 {} {} + iso8859-3 D0 replace \uFFFD -1 {} {} + iso8859-3 D0 strict {} 0 {} {} + iso8859-3 E3 tcl8 \U000000E3 -1 {} {} + iso8859-3 E3 replace \uFFFD -1 {} {} + iso8859-3 E3 strict {} 0 {} {} + iso8859-3 F0 tcl8 \U000000F0 -1 {} {} + iso8859-3 F0 replace \uFFFD -1 {} {} + iso8859-3 F0 strict {} 0 {} {} +}; # iso8859-3 + +# iso8859-3 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-3 \U000000A1 tcl8 1A -1 {} {} + iso8859-3 \U000000A1 replace 1A -1 {} {} + iso8859-3 \U000000A1 strict {} 0 {} {} + iso8859-3 \U00000400 tcl8 1A -1 {} {} + iso8859-3 \U00000400 replace 1A -1 {} {} + iso8859-3 \U00000400 strict {} 0 {} {} + iso8859-3 \U0000D800 tcl8 1A -1 {} {} + iso8859-3 \U0000D800 replace 1A -1 {} {} + iso8859-3 \U0000D800 strict {} 0 {} {} + iso8859-3 \U0000DC00 tcl8 1A -1 {} {} + iso8859-3 \U0000DC00 replace 1A -1 {} {} + iso8859-3 \U0000DC00 strict {} 0 {} {} + iso8859-3 \U00010000 tcl8 1A -1 {} {} + iso8859-3 \U00010000 replace 1A -1 {} {} + iso8859-3 \U00010000 strict {} 0 {} {} + iso8859-3 \U0010FFFF tcl8 1A -1 {} {} + iso8859-3 \U0010FFFF replace 1A -1 {} {} + iso8859-3 \U0010FFFF strict {} 0 {} {} +}; # iso8859-3 + +# +# iso8859-4 (generated from glibc-ISO_8859_4-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-4 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-4 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00AF AF 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00DA DA 00DB DB 00DC DC 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00FA FA 00FB FB 00FC FC 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 D0 0111 F0 0112 AA 0113 BA 0116 CC 0117 EC 0118 CA 0119 EA 0122 AB 0123 BB 0128 A5 0129 B5 012A CF 012B EF 012E C7 012F E7 0136 D3 0137 F3 0138 A2 013B A6 013C B6 0145 D1 0146 F1 014A BD 014B BF 014C D2 014D F2 0156 A3 0157 B3 0160 A9 0161 B9 0166 AC 0167 BC 0168 DD 0169 FD 016A DE 016B FE 0172 D9 0173 F9 017D AE 017E BE 02C7 B7 02D9 FF 02DB B2} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-4 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-4 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00AF AF 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00DA DA 00DB DB 00DC DC 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00FA FA 00FB FB 00FC FC 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 D0 0111 F0 0112 AA 0113 BA 0116 CC 0117 EC 0118 CA 0119 EA 0122 AB 0123 BB 0128 A5 0129 B5 012A CF 012B EF 012E C7 012F E7 0136 D3 0137 F3 0138 A2 013B A6 013C B6 0145 D1 0146 F1 014A BD 014B BF 014C D2 014D F2 0156 A3 0157 B3 0160 A9 0161 B9 0166 AC 0167 BC 0168 DD 0169 FD 016A DE 016B FE 0172 D9 0173 F9 017D AE 017E BE 02C7 B7 02D9 FF 02DB B2} +} -result {} + +# iso8859-4 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-4 + +# iso8859-4 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-4 \U000000A1 tcl8 1A -1 {} {} + iso8859-4 \U000000A1 replace 1A -1 {} {} + iso8859-4 \U000000A1 strict {} 0 {} {} + iso8859-4 \U00000400 tcl8 1A -1 {} {} + iso8859-4 \U00000400 replace 1A -1 {} {} + iso8859-4 \U00000400 strict {} 0 {} {} + iso8859-4 \U0000D800 tcl8 1A -1 {} {} + iso8859-4 \U0000D800 replace 1A -1 {} {} + iso8859-4 \U0000D800 strict {} 0 {} {} + iso8859-4 \U0000DC00 tcl8 1A -1 {} {} + iso8859-4 \U0000DC00 replace 1A -1 {} {} + iso8859-4 \U0000DC00 strict {} 0 {} {} + iso8859-4 \U00010000 tcl8 1A -1 {} {} + iso8859-4 \U00010000 replace 1A -1 {} {} + iso8859-4 \U00010000 strict {} 0 {} {} + iso8859-4 \U0010FFFF tcl8 1A -1 {} {} + iso8859-4 \U0010FFFF replace 1A -1 {} {} + iso8859-4 \U0010FFFF strict {} 0 {} {} +}; # iso8859-4 + +# +# iso8859-5 (generated from glibc-ISO_8859_5-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-5 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-5 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 FD 00AD AD 0401 A1 0402 A2 0403 A3 0404 A4 0405 A5 0406 A6 0407 A7 0408 A8 0409 A9 040A AA 040B AB 040C AC 040E AE 040F AF 0410 B0 0411 B1 0412 B2 0413 B3 0414 B4 0415 B5 0416 B6 0417 B7 0418 B8 0419 B9 041A BA 041B BB 041C BC 041D BD 041E BE 041F BF 0420 C0 0421 C1 0422 C2 0423 C3 0424 C4 0425 C5 0426 C6 0427 C7 0428 C8 0429 C9 042A CA 042B CB 042C CC 042D CD 042E CE 042F CF 0430 D0 0431 D1 0432 D2 0433 D3 0434 D4 0435 D5 0436 D6 0437 D7 0438 D8 0439 D9 043A DA 043B DB 043C DC 043D DD 043E DE 043F DF 0440 E0 0441 E1 0442 E2 0443 E3 0444 E4 0445 E5 0446 E6 0447 E7 0448 E8 0449 E9 044A EA 044B EB 044C EC 044D ED 044E EE 044F EF 0451 F1 0452 F2 0453 F3 0454 F4 0455 F5 0456 F6 0457 F7 0458 F8 0459 F9 045A FA 045B FB 045C FC 045E FE 045F FF 2116 F0} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-5 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-5 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 FD 00AD AD 0401 A1 0402 A2 0403 A3 0404 A4 0405 A5 0406 A6 0407 A7 0408 A8 0409 A9 040A AA 040B AB 040C AC 040E AE 040F AF 0410 B0 0411 B1 0412 B2 0413 B3 0414 B4 0415 B5 0416 B6 0417 B7 0418 B8 0419 B9 041A BA 041B BB 041C BC 041D BD 041E BE 041F BF 0420 C0 0421 C1 0422 C2 0423 C3 0424 C4 0425 C5 0426 C6 0427 C7 0428 C8 0429 C9 042A CA 042B CB 042C CC 042D CD 042E CE 042F CF 0430 D0 0431 D1 0432 D2 0433 D3 0434 D4 0435 D5 0436 D6 0437 D7 0438 D8 0439 D9 043A DA 043B DB 043C DC 043D DD 043E DE 043F DF 0440 E0 0441 E1 0442 E2 0443 E3 0444 E4 0445 E5 0446 E6 0447 E7 0448 E8 0449 E9 044A EA 044B EB 044C EC 044D ED 044E EE 044F EF 0451 F1 0452 F2 0453 F3 0454 F4 0455 F5 0456 F6 0457 F7 0458 F8 0459 F9 045A FA 045B FB 045C FC 045E FE 045F FF 2116 F0} +} -result {} + +# iso8859-5 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-5 + +# iso8859-5 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-5 \U000000A1 tcl8 1A -1 {} {} + iso8859-5 \U000000A1 replace 1A -1 {} {} + iso8859-5 \U000000A1 strict {} 0 {} {} + iso8859-5 \U00000400 tcl8 1A -1 {} {} + iso8859-5 \U00000400 replace 1A -1 {} {} + iso8859-5 \U00000400 strict {} 0 {} {} + iso8859-5 \U0000D800 tcl8 1A -1 {} {} + iso8859-5 \U0000D800 replace 1A -1 {} {} + iso8859-5 \U0000D800 strict {} 0 {} {} + iso8859-5 \U0000DC00 tcl8 1A -1 {} {} + iso8859-5 \U0000DC00 replace 1A -1 {} {} + iso8859-5 \U0000DC00 strict {} 0 {} {} + iso8859-5 \U00010000 tcl8 1A -1 {} {} + iso8859-5 \U00010000 replace 1A -1 {} {} + iso8859-5 \U00010000 strict {} 0 {} {} + iso8859-5 \U0010FFFF tcl8 1A -1 {} {} + iso8859-5 \U0010FFFF replace 1A -1 {} {} + iso8859-5 \U0010FFFF strict {} 0 {} {} +}; # iso8859-5 + +# +# iso8859-6 (generated from glibc-ISO_8859_6-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-6 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-6 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00AD AD 060C AC 061B BB 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D7 0638 D8 0639 D9 063A DA 0640 E0 0641 E1 0642 E2 0643 E3 0644 E4 0645 E5 0646 E6 0647 E7 0648 E8 0649 E9 064A EA 064B EB 064C EC 064D ED 064E EE 064F EF 0650 F0 0651 F1 0652 F2} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-6 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-6 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00AD AD 060C AC 061B BB 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D7 0638 D8 0639 D9 063A DA 0640 E0 0641 E1 0642 E2 0643 E3 0644 E4 0645 E5 0646 E6 0647 E7 0648 E8 0649 E9 064A EA 064B EB 064C EC 064D ED 064E EE 064F EF 0650 F0 0651 F1 0652 F2} +} -result {} + +# iso8859-6 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-6 A1 tcl8 \U000000A1 -1 {} {} + iso8859-6 A1 replace \uFFFD -1 {} {} + iso8859-6 A1 strict {} 0 {} {} + iso8859-6 A2 tcl8 \U000000A2 -1 {} {} + iso8859-6 A2 replace \uFFFD -1 {} {} + iso8859-6 A2 strict {} 0 {} {} + iso8859-6 A3 tcl8 \U000000A3 -1 {} {} + iso8859-6 A3 replace \uFFFD -1 {} {} + iso8859-6 A3 strict {} 0 {} {} + iso8859-6 A5 tcl8 \U000000A5 -1 {} {} + iso8859-6 A5 replace \uFFFD -1 {} {} + iso8859-6 A5 strict {} 0 {} {} + iso8859-6 A6 tcl8 \U000000A6 -1 {} {} + iso8859-6 A6 replace \uFFFD -1 {} {} + iso8859-6 A6 strict {} 0 {} {} + iso8859-6 A7 tcl8 \U000000A7 -1 {} {} + iso8859-6 A7 replace \uFFFD -1 {} {} + iso8859-6 A7 strict {} 0 {} {} + iso8859-6 A8 tcl8 \U000000A8 -1 {} {} + iso8859-6 A8 replace \uFFFD -1 {} {} + iso8859-6 A8 strict {} 0 {} {} + iso8859-6 A9 tcl8 \U000000A9 -1 {} {} + iso8859-6 A9 replace \uFFFD -1 {} {} + iso8859-6 A9 strict {} 0 {} {} + iso8859-6 AA tcl8 \U000000AA -1 {} {} + iso8859-6 AA replace \uFFFD -1 {} {} + iso8859-6 AA strict {} 0 {} {} + iso8859-6 AB tcl8 \U000000AB -1 {} {} + iso8859-6 AB replace \uFFFD -1 {} {} + iso8859-6 AB strict {} 0 {} {} + iso8859-6 AE tcl8 \U000000AE -1 {} {} + iso8859-6 AE replace \uFFFD -1 {} {} + iso8859-6 AE strict {} 0 {} {} + iso8859-6 AF tcl8 \U000000AF -1 {} {} + iso8859-6 AF replace \uFFFD -1 {} {} + iso8859-6 AF strict {} 0 {} {} + iso8859-6 B0 tcl8 \U000000B0 -1 {} {} + iso8859-6 B0 replace \uFFFD -1 {} {} + iso8859-6 B0 strict {} 0 {} {} + iso8859-6 B1 tcl8 \U000000B1 -1 {} {} + iso8859-6 B1 replace \uFFFD -1 {} {} + iso8859-6 B1 strict {} 0 {} {} + iso8859-6 B2 tcl8 \U000000B2 -1 {} {} + iso8859-6 B2 replace \uFFFD -1 {} {} + iso8859-6 B2 strict {} 0 {} {} + iso8859-6 B3 tcl8 \U000000B3 -1 {} {} + iso8859-6 B3 replace \uFFFD -1 {} {} + iso8859-6 B3 strict {} 0 {} {} + iso8859-6 B4 tcl8 \U000000B4 -1 {} {} + iso8859-6 B4 replace \uFFFD -1 {} {} + iso8859-6 B4 strict {} 0 {} {} + iso8859-6 B5 tcl8 \U000000B5 -1 {} {} + iso8859-6 B5 replace \uFFFD -1 {} {} + iso8859-6 B5 strict {} 0 {} {} + iso8859-6 B6 tcl8 \U000000B6 -1 {} {} + iso8859-6 B6 replace \uFFFD -1 {} {} + iso8859-6 B6 strict {} 0 {} {} + iso8859-6 B7 tcl8 \U000000B7 -1 {} {} + iso8859-6 B7 replace \uFFFD -1 {} {} + iso8859-6 B7 strict {} 0 {} {} + iso8859-6 B8 tcl8 \U000000B8 -1 {} {} + iso8859-6 B8 replace \uFFFD -1 {} {} + iso8859-6 B8 strict {} 0 {} {} + iso8859-6 B9 tcl8 \U000000B9 -1 {} {} + iso8859-6 B9 replace \uFFFD -1 {} {} + iso8859-6 B9 strict {} 0 {} {} + iso8859-6 BA tcl8 \U000000BA -1 {} {} + iso8859-6 BA replace \uFFFD -1 {} {} + iso8859-6 BA strict {} 0 {} {} + iso8859-6 BC tcl8 \U000000BC -1 {} {} + iso8859-6 BC replace \uFFFD -1 {} {} + iso8859-6 BC strict {} 0 {} {} + iso8859-6 BD tcl8 \U000000BD -1 {} {} + iso8859-6 BD replace \uFFFD -1 {} {} + iso8859-6 BD strict {} 0 {} {} + iso8859-6 BE tcl8 \U000000BE -1 {} {} + iso8859-6 BE replace \uFFFD -1 {} {} + iso8859-6 BE strict {} 0 {} {} + iso8859-6 C0 tcl8 \U000000C0 -1 {} {} + iso8859-6 C0 replace \uFFFD -1 {} {} + iso8859-6 C0 strict {} 0 {} {} + iso8859-6 DB tcl8 \U000000DB -1 {} {} + iso8859-6 DB replace \uFFFD -1 {} {} + iso8859-6 DB strict {} 0 {} {} + iso8859-6 DC tcl8 \U000000DC -1 {} {} + iso8859-6 DC replace \uFFFD -1 {} {} + iso8859-6 DC strict {} 0 {} {} + iso8859-6 DD tcl8 \U000000DD -1 {} {} + iso8859-6 DD replace \uFFFD -1 {} {} + iso8859-6 DD strict {} 0 {} {} + iso8859-6 DE tcl8 \U000000DE -1 {} {} + iso8859-6 DE replace \uFFFD -1 {} {} + iso8859-6 DE strict {} 0 {} {} + iso8859-6 DF tcl8 \U000000DF -1 {} {} + iso8859-6 DF replace \uFFFD -1 {} {} + iso8859-6 DF strict {} 0 {} {} + iso8859-6 F3 tcl8 \U000000F3 -1 {} {} + iso8859-6 F3 replace \uFFFD -1 {} {} + iso8859-6 F3 strict {} 0 {} {} + iso8859-6 F4 tcl8 \U000000F4 -1 {} {} + iso8859-6 F4 replace \uFFFD -1 {} {} + iso8859-6 F4 strict {} 0 {} {} + iso8859-6 F5 tcl8 \U000000F5 -1 {} {} + iso8859-6 F5 replace \uFFFD -1 {} {} + iso8859-6 F5 strict {} 0 {} {} + iso8859-6 F6 tcl8 \U000000F6 -1 {} {} + iso8859-6 F6 replace \uFFFD -1 {} {} + iso8859-6 F6 strict {} 0 {} {} + iso8859-6 F7 tcl8 \U000000F7 -1 {} {} + iso8859-6 F7 replace \uFFFD -1 {} {} + iso8859-6 F7 strict {} 0 {} {} + iso8859-6 F8 tcl8 \U000000F8 -1 {} {} + iso8859-6 F8 replace \uFFFD -1 {} {} + iso8859-6 F8 strict {} 0 {} {} + iso8859-6 F9 tcl8 \U000000F9 -1 {} {} + iso8859-6 F9 replace \uFFFD -1 {} {} + iso8859-6 F9 strict {} 0 {} {} + iso8859-6 FA tcl8 \U000000FA -1 {} {} + iso8859-6 FA replace \uFFFD -1 {} {} + iso8859-6 FA strict {} 0 {} {} + iso8859-6 FB tcl8 \U000000FB -1 {} {} + iso8859-6 FB replace \uFFFD -1 {} {} + iso8859-6 FB strict {} 0 {} {} + iso8859-6 FC tcl8 \U000000FC -1 {} {} + iso8859-6 FC replace \uFFFD -1 {} {} + iso8859-6 FC strict {} 0 {} {} + iso8859-6 FD tcl8 \U000000FD -1 {} {} + iso8859-6 FD replace \uFFFD -1 {} {} + iso8859-6 FD strict {} 0 {} {} + iso8859-6 FE tcl8 \U000000FE -1 {} {} + iso8859-6 FE replace \uFFFD -1 {} {} + iso8859-6 FE strict {} 0 {} {} + iso8859-6 FF tcl8 \U000000FF -1 {} {} + iso8859-6 FF replace \uFFFD -1 {} {} + iso8859-6 FF strict {} 0 {} {} +}; # iso8859-6 + +# iso8859-6 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-6 \U000000A1 tcl8 1A -1 {} {} + iso8859-6 \U000000A1 replace 1A -1 {} {} + iso8859-6 \U000000A1 strict {} 0 {} {} + iso8859-6 \U00000400 tcl8 1A -1 {} {} + iso8859-6 \U00000400 replace 1A -1 {} {} + iso8859-6 \U00000400 strict {} 0 {} {} + iso8859-6 \U0000D800 tcl8 1A -1 {} {} + iso8859-6 \U0000D800 replace 1A -1 {} {} + iso8859-6 \U0000D800 strict {} 0 {} {} + iso8859-6 \U0000DC00 tcl8 1A -1 {} {} + iso8859-6 \U0000DC00 replace 1A -1 {} {} + iso8859-6 \U0000DC00 strict {} 0 {} {} + iso8859-6 \U00010000 tcl8 1A -1 {} {} + iso8859-6 \U00010000 replace 1A -1 {} {} + iso8859-6 \U00010000 strict {} 0 {} {} + iso8859-6 \U0010FFFF tcl8 1A -1 {} {} + iso8859-6 \U0010FFFF replace 1A -1 {} {} + iso8859-6 \U0010FFFF strict {} 0 {} {} +}; # iso8859-6 + +# +# iso8859-7 (generated from glibc-ISO_8859_7-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-7 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-7 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B7 B7 00BB BB 00BD BD 037A AA 0384 B4 0385 B5 0386 B6 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2015 AF 2018 A1 2019 A2 20AC A4 20AF A5} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-7 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-7 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B7 B7 00BB BB 00BD BD 037A AA 0384 B4 0385 B5 0386 B6 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2015 AF 2018 A1 2019 A2 20AC A4 20AF A5} +} -result {} + +# iso8859-7 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-7 AE tcl8 \U000000AE -1 {} {} + iso8859-7 AE replace \uFFFD -1 {} {} + iso8859-7 AE strict {} 0 {} {} + iso8859-7 D2 tcl8 \U000000D2 -1 {} {} + iso8859-7 D2 replace \uFFFD -1 {} {} + iso8859-7 D2 strict {} 0 {} {} + iso8859-7 FF tcl8 \U000000FF -1 {} {} + iso8859-7 FF replace \uFFFD -1 {} {} + iso8859-7 FF strict {} 0 {} {} +}; # iso8859-7 + +# iso8859-7 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-7 \U000000A1 tcl8 1A -1 {} {} + iso8859-7 \U000000A1 replace 1A -1 {} {} + iso8859-7 \U000000A1 strict {} 0 {} {} + iso8859-7 \U00000400 tcl8 1A -1 {} {} + iso8859-7 \U00000400 replace 1A -1 {} {} + iso8859-7 \U00000400 strict {} 0 {} {} + iso8859-7 \U0000D800 tcl8 1A -1 {} {} + iso8859-7 \U0000D800 replace 1A -1 {} {} + iso8859-7 \U0000D800 strict {} 0 {} {} + iso8859-7 \U0000DC00 tcl8 1A -1 {} {} + iso8859-7 \U0000DC00 replace 1A -1 {} {} + iso8859-7 \U0000DC00 strict {} 0 {} {} + iso8859-7 \U00010000 tcl8 1A -1 {} {} + iso8859-7 \U00010000 replace 1A -1 {} {} + iso8859-7 \U00010000 strict {} 0 {} {} + iso8859-7 \U0010FFFF tcl8 1A -1 {} {} + iso8859-7 \U0010FFFF replace 1A -1 {} {} + iso8859-7 \U0010FFFF strict {} 0 {} {} +}; # iso8859-7 + +# +# iso8859-8 (generated from glibc-ISO_8859_8-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-8 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-8 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 AA 00F7 BA 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 200E FD 200F FE 2017 DF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-8 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-8 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 AA 00F7 BA 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 200E FD 200F FE 2017 DF} +} -result {} + +# iso8859-8 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-8 A1 tcl8 \U000000A1 -1 {} {} + iso8859-8 A1 replace \uFFFD -1 {} {} + iso8859-8 A1 strict {} 0 {} {} + iso8859-8 BF tcl8 \U000000BF -1 {} {} + iso8859-8 BF replace \uFFFD -1 {} {} + iso8859-8 BF strict {} 0 {} {} + iso8859-8 C0 tcl8 \U000000C0 -1 {} {} + iso8859-8 C0 replace \uFFFD -1 {} {} + iso8859-8 C0 strict {} 0 {} {} + iso8859-8 C1 tcl8 \U000000C1 -1 {} {} + iso8859-8 C1 replace \uFFFD -1 {} {} + iso8859-8 C1 strict {} 0 {} {} + iso8859-8 C2 tcl8 \U000000C2 -1 {} {} + iso8859-8 C2 replace \uFFFD -1 {} {} + iso8859-8 C2 strict {} 0 {} {} + iso8859-8 C3 tcl8 \U000000C3 -1 {} {} + iso8859-8 C3 replace \uFFFD -1 {} {} + iso8859-8 C3 strict {} 0 {} {} + iso8859-8 C4 tcl8 \U000000C4 -1 {} {} + iso8859-8 C4 replace \uFFFD -1 {} {} + iso8859-8 C4 strict {} 0 {} {} + iso8859-8 C5 tcl8 \U000000C5 -1 {} {} + iso8859-8 C5 replace \uFFFD -1 {} {} + iso8859-8 C5 strict {} 0 {} {} + iso8859-8 C6 tcl8 \U000000C6 -1 {} {} + iso8859-8 C6 replace \uFFFD -1 {} {} + iso8859-8 C6 strict {} 0 {} {} + iso8859-8 C7 tcl8 \U000000C7 -1 {} {} + iso8859-8 C7 replace \uFFFD -1 {} {} + iso8859-8 C7 strict {} 0 {} {} + iso8859-8 C8 tcl8 \U000000C8 -1 {} {} + iso8859-8 C8 replace \uFFFD -1 {} {} + iso8859-8 C8 strict {} 0 {} {} + iso8859-8 C9 tcl8 \U000000C9 -1 {} {} + iso8859-8 C9 replace \uFFFD -1 {} {} + iso8859-8 C9 strict {} 0 {} {} + iso8859-8 CA tcl8 \U000000CA -1 {} {} + iso8859-8 CA replace \uFFFD -1 {} {} + iso8859-8 CA strict {} 0 {} {} + iso8859-8 CB tcl8 \U000000CB -1 {} {} + iso8859-8 CB replace \uFFFD -1 {} {} + iso8859-8 CB strict {} 0 {} {} + iso8859-8 CC tcl8 \U000000CC -1 {} {} + iso8859-8 CC replace \uFFFD -1 {} {} + iso8859-8 CC strict {} 0 {} {} + iso8859-8 CD tcl8 \U000000CD -1 {} {} + iso8859-8 CD replace \uFFFD -1 {} {} + iso8859-8 CD strict {} 0 {} {} + iso8859-8 CE tcl8 \U000000CE -1 {} {} + iso8859-8 CE replace \uFFFD -1 {} {} + iso8859-8 CE strict {} 0 {} {} + iso8859-8 CF tcl8 \U000000CF -1 {} {} + iso8859-8 CF replace \uFFFD -1 {} {} + iso8859-8 CF strict {} 0 {} {} + iso8859-8 D0 tcl8 \U000000D0 -1 {} {} + iso8859-8 D0 replace \uFFFD -1 {} {} + iso8859-8 D0 strict {} 0 {} {} + iso8859-8 D1 tcl8 \U000000D1 -1 {} {} + iso8859-8 D1 replace \uFFFD -1 {} {} + iso8859-8 D1 strict {} 0 {} {} + iso8859-8 D2 tcl8 \U000000D2 -1 {} {} + iso8859-8 D2 replace \uFFFD -1 {} {} + iso8859-8 D2 strict {} 0 {} {} + iso8859-8 D3 tcl8 \U000000D3 -1 {} {} + iso8859-8 D3 replace \uFFFD -1 {} {} + iso8859-8 D3 strict {} 0 {} {} + iso8859-8 D4 tcl8 \U000000D4 -1 {} {} + iso8859-8 D4 replace \uFFFD -1 {} {} + iso8859-8 D4 strict {} 0 {} {} + iso8859-8 D5 tcl8 \U000000D5 -1 {} {} + iso8859-8 D5 replace \uFFFD -1 {} {} + iso8859-8 D5 strict {} 0 {} {} + iso8859-8 D6 tcl8 \U000000D6 -1 {} {} + iso8859-8 D6 replace \uFFFD -1 {} {} + iso8859-8 D6 strict {} 0 {} {} + iso8859-8 D7 tcl8 \U000000D7 -1 {} {} + iso8859-8 D7 replace \uFFFD -1 {} {} + iso8859-8 D7 strict {} 0 {} {} + iso8859-8 D8 tcl8 \U000000D8 -1 {} {} + iso8859-8 D8 replace \uFFFD -1 {} {} + iso8859-8 D8 strict {} 0 {} {} + iso8859-8 D9 tcl8 \U000000D9 -1 {} {} + iso8859-8 D9 replace \uFFFD -1 {} {} + iso8859-8 D9 strict {} 0 {} {} + iso8859-8 DA tcl8 \U000000DA -1 {} {} + iso8859-8 DA replace \uFFFD -1 {} {} + iso8859-8 DA strict {} 0 {} {} + iso8859-8 DB tcl8 \U000000DB -1 {} {} + iso8859-8 DB replace \uFFFD -1 {} {} + iso8859-8 DB strict {} 0 {} {} + iso8859-8 DC tcl8 \U000000DC -1 {} {} + iso8859-8 DC replace \uFFFD -1 {} {} + iso8859-8 DC strict {} 0 {} {} + iso8859-8 DD tcl8 \U000000DD -1 {} {} + iso8859-8 DD replace \uFFFD -1 {} {} + iso8859-8 DD strict {} 0 {} {} + iso8859-8 DE tcl8 \U000000DE -1 {} {} + iso8859-8 DE replace \uFFFD -1 {} {} + iso8859-8 DE strict {} 0 {} {} + iso8859-8 FB tcl8 \U000000FB -1 {} {} + iso8859-8 FB replace \uFFFD -1 {} {} + iso8859-8 FB strict {} 0 {} {} + iso8859-8 FC tcl8 \U000000FC -1 {} {} + iso8859-8 FC replace \uFFFD -1 {} {} + iso8859-8 FC strict {} 0 {} {} + iso8859-8 FF tcl8 \U000000FF -1 {} {} + iso8859-8 FF replace \uFFFD -1 {} {} + iso8859-8 FF strict {} 0 {} {} +}; # iso8859-8 + +# iso8859-8 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-8 \U000000A1 tcl8 1A -1 {} {} + iso8859-8 \U000000A1 replace 1A -1 {} {} + iso8859-8 \U000000A1 strict {} 0 {} {} + iso8859-8 \U00000400 tcl8 1A -1 {} {} + iso8859-8 \U00000400 replace 1A -1 {} {} + iso8859-8 \U00000400 strict {} 0 {} {} + iso8859-8 \U0000D800 tcl8 1A -1 {} {} + iso8859-8 \U0000D800 replace 1A -1 {} {} + iso8859-8 \U0000D800 strict {} 0 {} {} + iso8859-8 \U0000DC00 tcl8 1A -1 {} {} + iso8859-8 \U0000DC00 replace 1A -1 {} {} + iso8859-8 \U0000DC00 strict {} 0 {} {} + iso8859-8 \U00010000 tcl8 1A -1 {} {} + iso8859-8 \U00010000 replace 1A -1 {} {} + iso8859-8 \U00010000 strict {} 0 {} {} + iso8859-8 \U0010FFFF tcl8 1A -1 {} {} + iso8859-8 \U0010FFFF replace 1A -1 {} {} + iso8859-8 \U0010FFFF strict {} 0 {} {} +}; # iso8859-8 + +# +# iso8859-9 (generated from glibc-ISO_8859_9-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-9 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-9 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 015E DE 015F FE} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-9 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-9 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 015E DE 015F FE} +} -result {} + +# iso8859-9 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-9 + +# iso8859-9 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-9 \U000000D0 tcl8 1A -1 {} {} + iso8859-9 \U000000D0 replace 1A -1 {} {} + iso8859-9 \U000000D0 strict {} 0 {} {} + iso8859-9 \U00000400 tcl8 1A -1 {} {} + iso8859-9 \U00000400 replace 1A -1 {} {} + iso8859-9 \U00000400 strict {} 0 {} {} + iso8859-9 \U0000D800 tcl8 1A -1 {} {} + iso8859-9 \U0000D800 replace 1A -1 {} {} + iso8859-9 \U0000D800 strict {} 0 {} {} + iso8859-9 \U0000DC00 tcl8 1A -1 {} {} + iso8859-9 \U0000DC00 replace 1A -1 {} {} + iso8859-9 \U0000DC00 strict {} 0 {} {} + iso8859-9 \U00010000 tcl8 1A -1 {} {} + iso8859-9 \U00010000 replace 1A -1 {} {} + iso8859-9 \U00010000 strict {} 0 {} {} + iso8859-9 \U0010FFFF tcl8 1A -1 {} {} + iso8859-9 \U0010FFFF replace 1A -1 {} {} + iso8859-9 \U0010FFFF strict {} 0 {} {} +}; # iso8859-9 + +# +# iso8859-10 (generated from glibc-ISO_8859_10-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-10 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-10 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00AD AD 00B0 B0 00B7 B7 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00CF CF 00D0 D0 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00EF EF 00F0 F0 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 A9 0111 B9 0112 A2 0113 B2 0116 CC 0117 EC 0118 CA 0119 EA 0122 A3 0123 B3 0128 A5 0129 B5 012A A4 012B B4 012E C7 012F E7 0136 A6 0137 B6 0138 FF 013B A8 013C B8 0145 D1 0146 F1 014A AF 014B BF 014C D2 014D F2 0160 AA 0161 BA 0166 AB 0167 BB 0168 D7 0169 F7 016A AE 016B BE 0172 D9 0173 F9 017D AC 017E BC 2015 BD} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-10 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-10 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00AD AD 00B0 B0 00B7 B7 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00CF CF 00D0 D0 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00EF EF 00F0 F0 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 A9 0111 B9 0112 A2 0113 B2 0116 CC 0117 EC 0118 CA 0119 EA 0122 A3 0123 B3 0128 A5 0129 B5 012A A4 012B B4 012E C7 012F E7 0136 A6 0137 B6 0138 FF 013B A8 013C B8 0145 D1 0146 F1 014A AF 014B BF 014C D2 014D F2 0160 AA 0161 BA 0166 AB 0167 BB 0168 D7 0169 F7 016A AE 016B BE 0172 D9 0173 F9 017D AC 017E BC 2015 BD} +} -result {} + +# iso8859-10 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-10 + +# iso8859-10 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-10 \U000000A1 tcl8 1A -1 {} {} + iso8859-10 \U000000A1 replace 1A -1 {} {} + iso8859-10 \U000000A1 strict {} 0 {} {} + iso8859-10 \U00000400 tcl8 1A -1 {} {} + iso8859-10 \U00000400 replace 1A -1 {} {} + iso8859-10 \U00000400 strict {} 0 {} {} + iso8859-10 \U0000D800 tcl8 1A -1 {} {} + iso8859-10 \U0000D800 replace 1A -1 {} {} + iso8859-10 \U0000D800 strict {} 0 {} {} + iso8859-10 \U0000DC00 tcl8 1A -1 {} {} + iso8859-10 \U0000DC00 replace 1A -1 {} {} + iso8859-10 \U0000DC00 strict {} 0 {} {} + iso8859-10 \U00010000 tcl8 1A -1 {} {} + iso8859-10 \U00010000 replace 1A -1 {} {} + iso8859-10 \U00010000 strict {} 0 {} {} + iso8859-10 \U0010FFFF tcl8 1A -1 {} {} + iso8859-10 \U0010FFFF replace 1A -1 {} {} + iso8859-10 \U0010FFFF strict {} 0 {} {} +}; # iso8859-10 + +# +# iso8859-11 (generated from glibc-ISO_8859_11-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-11 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-11 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 0E01 A1 0E02 A2 0E03 A3 0E04 A4 0E05 A5 0E06 A6 0E07 A7 0E08 A8 0E09 A9 0E0A AA 0E0B AB 0E0C AC 0E0D AD 0E0E AE 0E0F AF 0E10 B0 0E11 B1 0E12 B2 0E13 B3 0E14 B4 0E15 B5 0E16 B6 0E17 B7 0E18 B8 0E19 B9 0E1A BA 0E1B BB 0E1C BC 0E1D BD 0E1E BE 0E1F BF 0E20 C0 0E21 C1 0E22 C2 0E23 C3 0E24 C4 0E25 C5 0E26 C6 0E27 C7 0E28 C8 0E29 C9 0E2A CA 0E2B CB 0E2C CC 0E2D CD 0E2E CE 0E2F CF 0E30 D0 0E31 D1 0E32 D2 0E33 D3 0E34 D4 0E35 D5 0E36 D6 0E37 D7 0E38 D8 0E39 D9 0E3A DA 0E3F DF 0E40 E0 0E41 E1 0E42 E2 0E43 E3 0E44 E4 0E45 E5 0E46 E6 0E47 E7 0E48 E8 0E49 E9 0E4A EA 0E4B EB 0E4C EC 0E4D ED 0E4E EE 0E4F EF 0E50 F0 0E51 F1 0E52 F2 0E53 F3 0E54 F4 0E55 F5 0E56 F6 0E57 F7 0E58 F8 0E59 F9 0E5A FA 0E5B FB} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-11 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-11 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 0E01 A1 0E02 A2 0E03 A3 0E04 A4 0E05 A5 0E06 A6 0E07 A7 0E08 A8 0E09 A9 0E0A AA 0E0B AB 0E0C AC 0E0D AD 0E0E AE 0E0F AF 0E10 B0 0E11 B1 0E12 B2 0E13 B3 0E14 B4 0E15 B5 0E16 B6 0E17 B7 0E18 B8 0E19 B9 0E1A BA 0E1B BB 0E1C BC 0E1D BD 0E1E BE 0E1F BF 0E20 C0 0E21 C1 0E22 C2 0E23 C3 0E24 C4 0E25 C5 0E26 C6 0E27 C7 0E28 C8 0E29 C9 0E2A CA 0E2B CB 0E2C CC 0E2D CD 0E2E CE 0E2F CF 0E30 D0 0E31 D1 0E32 D2 0E33 D3 0E34 D4 0E35 D5 0E36 D6 0E37 D7 0E38 D8 0E39 D9 0E3A DA 0E3F DF 0E40 E0 0E41 E1 0E42 E2 0E43 E3 0E44 E4 0E45 E5 0E46 E6 0E47 E7 0E48 E8 0E49 E9 0E4A EA 0E4B EB 0E4C EC 0E4D ED 0E4E EE 0E4F EF 0E50 F0 0E51 F1 0E52 F2 0E53 F3 0E54 F4 0E55 F5 0E56 F6 0E57 F7 0E58 F8 0E59 F9 0E5A FA 0E5B FB} +} -result {} + +# iso8859-11 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-11 DB tcl8 \U000000DB -1 {} {} + iso8859-11 DB replace \uFFFD -1 {} {} + iso8859-11 DB strict {} 0 {} {} + iso8859-11 DC tcl8 \U000000DC -1 {} {} + iso8859-11 DC replace \uFFFD -1 {} {} + iso8859-11 DC strict {} 0 {} {} + iso8859-11 DD tcl8 \U000000DD -1 {} {} + iso8859-11 DD replace \uFFFD -1 {} {} + iso8859-11 DD strict {} 0 {} {} + iso8859-11 DE tcl8 \U000000DE -1 {} {} + iso8859-11 DE replace \uFFFD -1 {} {} + iso8859-11 DE strict {} 0 {} {} + iso8859-11 FC tcl8 \U000000FC -1 {} {} + iso8859-11 FC replace \uFFFD -1 {} {} + iso8859-11 FC strict {} 0 {} {} + iso8859-11 FD tcl8 \U000000FD -1 {} {} + iso8859-11 FD replace \uFFFD -1 {} {} + iso8859-11 FD strict {} 0 {} {} + iso8859-11 FE tcl8 \U000000FE -1 {} {} + iso8859-11 FE replace \uFFFD -1 {} {} + iso8859-11 FE strict {} 0 {} {} + iso8859-11 FF tcl8 \U000000FF -1 {} {} + iso8859-11 FF replace \uFFFD -1 {} {} + iso8859-11 FF strict {} 0 {} {} +}; # iso8859-11 + +# iso8859-11 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-11 \U000000A1 tcl8 1A -1 {} {} + iso8859-11 \U000000A1 replace 1A -1 {} {} + iso8859-11 \U000000A1 strict {} 0 {} {} + iso8859-11 \U00000400 tcl8 1A -1 {} {} + iso8859-11 \U00000400 replace 1A -1 {} {} + iso8859-11 \U00000400 strict {} 0 {} {} + iso8859-11 \U0000D800 tcl8 1A -1 {} {} + iso8859-11 \U0000D800 replace 1A -1 {} {} + iso8859-11 \U0000D800 strict {} 0 {} {} + iso8859-11 \U0000DC00 tcl8 1A -1 {} {} + iso8859-11 \U0000DC00 replace 1A -1 {} {} + iso8859-11 \U0000DC00 strict {} 0 {} {} + iso8859-11 \U00010000 tcl8 1A -1 {} {} + iso8859-11 \U00010000 replace 1A -1 {} {} + iso8859-11 \U00010000 strict {} 0 {} {} + iso8859-11 \U0010FFFF tcl8 1A -1 {} {} + iso8859-11 \U0010FFFF replace 1A -1 {} {} + iso8859-11 \U0010FFFF strict {} 0 {} {} +}; # iso8859-11 + +# +# iso8859-13 (generated from glibc-ISO_8859_13-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-13 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-13 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 2019 FF 201C B4 201D A1 201E A5} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-13 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-13 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 2019 FF 201C B4 201D A1 201E A5} +} -result {} + +# iso8859-13 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-13 + +# iso8859-13 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-13 \U000000A1 tcl8 1A -1 {} {} + iso8859-13 \U000000A1 replace 1A -1 {} {} + iso8859-13 \U000000A1 strict {} 0 {} {} + iso8859-13 \U00000400 tcl8 1A -1 {} {} + iso8859-13 \U00000400 replace 1A -1 {} {} + iso8859-13 \U00000400 strict {} 0 {} {} + iso8859-13 \U0000D800 tcl8 1A -1 {} {} + iso8859-13 \U0000D800 replace 1A -1 {} {} + iso8859-13 \U0000D800 strict {} 0 {} {} + iso8859-13 \U0000DC00 tcl8 1A -1 {} {} + iso8859-13 \U0000DC00 replace 1A -1 {} {} + iso8859-13 \U0000DC00 strict {} 0 {} {} + iso8859-13 \U00010000 tcl8 1A -1 {} {} + iso8859-13 \U00010000 replace 1A -1 {} {} + iso8859-13 \U00010000 strict {} 0 {} {} + iso8859-13 \U0010FFFF tcl8 1A -1 {} {} + iso8859-13 \U0010FFFF replace 1A -1 {} {} + iso8859-13 \U0010FFFF strict {} 0 {} {} +}; # iso8859-13 + +# +# iso8859-14 (generated from glibc-ISO_8859_14-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-14 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-14 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A7 A7 00A9 A9 00AD AD 00AE AE 00B6 B6 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FF FF 010A A4 010B A5 0120 B2 0121 B3 0174 D0 0175 F0 0176 DE 0177 FE 0178 AF 1E02 A1 1E03 A2 1E0A A6 1E0B AB 1E1E B0 1E1F B1 1E40 B4 1E41 B5 1E56 B7 1E57 B9 1E60 BB 1E61 BF 1E6A D7 1E6B F7 1E80 A8 1E81 B8 1E82 AA 1E83 BA 1E84 BD 1E85 BE 1EF2 AC 1EF3 BC} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-14 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-14 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A7 A7 00A9 A9 00AD AD 00AE AE 00B6 B6 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FF FF 010A A4 010B A5 0120 B2 0121 B3 0174 D0 0175 F0 0176 DE 0177 FE 0178 AF 1E02 A1 1E03 A2 1E0A A6 1E0B AB 1E1E B0 1E1F B1 1E40 B4 1E41 B5 1E56 B7 1E57 B9 1E60 BB 1E61 BF 1E6A D7 1E6B F7 1E80 A8 1E81 B8 1E82 AA 1E83 BA 1E84 BD 1E85 BE 1EF2 AC 1EF3 BC} +} -result {} + +# iso8859-14 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-14 + +# iso8859-14 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-14 \U000000A1 tcl8 1A -1 {} {} + iso8859-14 \U000000A1 replace 1A -1 {} {} + iso8859-14 \U000000A1 strict {} 0 {} {} + iso8859-14 \U00000400 tcl8 1A -1 {} {} + iso8859-14 \U00000400 replace 1A -1 {} {} + iso8859-14 \U00000400 strict {} 0 {} {} + iso8859-14 \U0000D800 tcl8 1A -1 {} {} + iso8859-14 \U0000D800 replace 1A -1 {} {} + iso8859-14 \U0000D800 strict {} 0 {} {} + iso8859-14 \U0000DC00 tcl8 1A -1 {} {} + iso8859-14 \U0000DC00 replace 1A -1 {} {} + iso8859-14 \U0000DC00 strict {} 0 {} {} + iso8859-14 \U00010000 tcl8 1A -1 {} {} + iso8859-14 \U00010000 replace 1A -1 {} {} + iso8859-14 \U00010000 strict {} 0 {} {} + iso8859-14 \U0010FFFF tcl8 1A -1 {} {} + iso8859-14 \U0010FFFF replace 1A -1 {} {} + iso8859-14 \U0010FFFF strict {} 0 {} {} +}; # iso8859-14 + +# +# iso8859-15 (generated from glibc-ISO_8859_15-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-15 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-15 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A7 A7 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BA BA 00BB BB 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 BC 0153 BD 0160 A6 0161 A8 0178 BE 017D B4 017E B8 20AC A4} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-15 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-15 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A7 A7 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BA BA 00BB BB 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 BC 0153 BD 0160 A6 0161 A8 0178 BE 017D B4 017E B8 20AC A4} +} -result {} + +# iso8859-15 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-15 + +# iso8859-15 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-15 \U000000A4 tcl8 1A -1 {} {} + iso8859-15 \U000000A4 replace 1A -1 {} {} + iso8859-15 \U000000A4 strict {} 0 {} {} + iso8859-15 \U00000400 tcl8 1A -1 {} {} + iso8859-15 \U00000400 replace 1A -1 {} {} + iso8859-15 \U00000400 strict {} 0 {} {} + iso8859-15 \U0000D800 tcl8 1A -1 {} {} + iso8859-15 \U0000D800 replace 1A -1 {} {} + iso8859-15 \U0000D800 strict {} 0 {} {} + iso8859-15 \U0000DC00 tcl8 1A -1 {} {} + iso8859-15 \U0000DC00 replace 1A -1 {} {} + iso8859-15 \U0000DC00 strict {} 0 {} {} + iso8859-15 \U00010000 tcl8 1A -1 {} {} + iso8859-15 \U00010000 replace 1A -1 {} {} + iso8859-15 \U00010000 strict {} 0 {} {} + iso8859-15 \U0010FFFF tcl8 1A -1 {} {} + iso8859-15 \U0010FFFF replace 1A -1 {} {} + iso8859-15 \U0010FFFF strict {} 0 {} {} +}; # iso8859-15 + +# +# iso8859-16 (generated from glibc-ISO_8859_16-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-16 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-16 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00A9 A9 00AB AB 00AD AD 00B0 B0 00B1 B1 00B6 B6 00B7 B7 00BB BB 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0104 A1 0105 A2 0106 C5 0107 E5 010C B2 010D B9 0110 D0 0111 F0 0118 DD 0119 FD 0141 A3 0142 B3 0143 D1 0144 F1 0150 D5 0151 F5 0152 BC 0153 BD 015A D7 015B F7 0160 A6 0161 A8 0170 D8 0171 F8 0178 BE 0179 AC 017A AE 017B AF 017C BF 017D B4 017E B8 0218 AA 0219 BA 021A DE 021B FE 201D B5 201E A5 20AC A4} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-16 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-16 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00A9 A9 00AB AB 00AD AD 00B0 B0 00B1 B1 00B6 B6 00B7 B7 00BB BB 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0104 A1 0105 A2 0106 C5 0107 E5 010C B2 010D B9 0110 D0 0111 F0 0118 DD 0119 FD 0141 A3 0142 B3 0143 D1 0144 F1 0150 D5 0151 F5 0152 BC 0153 BD 015A D7 015B F7 0160 A6 0161 A8 0170 D8 0171 F8 0178 BE 0179 AC 017A AE 017B AF 017C BF 017D B4 017E B8 0218 AA 0219 BA 021A DE 021B FE 201D B5 201E A5 20AC A4} +} -result {} + +# iso8859-16 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-16 + +# iso8859-16 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-16 \U000000A1 tcl8 1A -1 {} {} + iso8859-16 \U000000A1 replace 1A -1 {} {} + iso8859-16 \U000000A1 strict {} 0 {} {} + iso8859-16 \U00000400 tcl8 1A -1 {} {} + iso8859-16 \U00000400 replace 1A -1 {} {} + iso8859-16 \U00000400 strict {} 0 {} {} + iso8859-16 \U0000D800 tcl8 1A -1 {} {} + iso8859-16 \U0000D800 replace 1A -1 {} {} + iso8859-16 \U0000D800 strict {} 0 {} {} + iso8859-16 \U0000DC00 tcl8 1A -1 {} {} + iso8859-16 \U0000DC00 replace 1A -1 {} {} + iso8859-16 \U0000DC00 strict {} 0 {} {} + iso8859-16 \U00010000 tcl8 1A -1 {} {} + iso8859-16 \U00010000 replace 1A -1 {} {} + iso8859-16 \U00010000 strict {} 0 {} {} + iso8859-16 \U0010FFFF tcl8 1A -1 {} {} + iso8859-16 \U0010FFFF replace 1A -1 {} {} + iso8859-16 \U0010FFFF strict {} 0 {} {} +}; # iso8859-16 diff --git a/tools/ucm2tests.tcl b/tools/ucm2tests.tcl index e971631..dc878ef 100644 --- a/tools/ucm2tests.tcl +++ b/tools/ucm2tests.tcl @@ -37,14 +37,27 @@ namespace eval ucm { iso8859-9 glibc-ISO_8859_9-2.1.2 iso8859-10 glibc-ISO_8859_10-2.1.2 iso8859-11 glibc-ISO_8859_11-2.1.2 - iso8859-13 glibc-ISO_8859_13-2.1.2 + iso8859-13 glibc-ISO_8859_13-2.3.3 iso8859-14 glibc-ISO_8859_14-2.1.2 iso8859-15 glibc-ISO_8859_15-2.1.2 iso8859-16 glibc-ISO_8859_16-2.3.3 } - # Dictionary Character map for Tcl encoding + # Array keyed by Tcl encoding name. Each element contains mapping of + # Unicode code point -> byte sequence for that encoding as a flat list + # (or dictionary). Both are stored as hex strings variable charMap + + # Array keyed by Tcl encoding name. List of invalid code sequences + # each being a hex string. + variable invalidCodeSequences + + # Array keyed by Tcl encoding name. List of unicode code points that are + # not mapped, each being a hex string. + variable unmappedCodePoints + + # The fallback character per encoding + variable encSubchar } proc ucm::abort {msg} { @@ -68,7 +81,11 @@ proc ucm::print {s} { puts $outputChan $s } -proc ucm::parse_SBCS {fd} { +proc ucm::parse_SBCS {encName fd} { + variable charMap + variable invalidCodeSequences + variable unmappedCodePoints + set result {} while {[gets $fd line] >= 0} { if {[string match #* $line]} { @@ -87,26 +104,44 @@ proc ucm::parse_SBCS {fd} { # It is a fallback mapping - ignore } } - return $result -} + set charMap($encName) $result -proc ucm::generate_tests {} { - variable encNameMap - variable charMap - variable outputPath - variable outputChan - - if {[info exists outputPath]} { - set outputChan [open $outputPath w] - } else { - set outputChan stdout + # Find out invalid code sequences and unicode code points that are not mapped + set valid {} + set mapped {} + foreach {unich bytes} $result { + lappend mapped $unich + lappend valid $bytes + } + set invalidCodeSequences($encName) {} + for {set i 0} {$i <= 255} {incr i} { + set hex [format %.2X $i] + if {[lsearch -exact $valid $hex] < 0} { + lappend invalidCodeSequences($encName) $hex + } } - array set tclNames {} - foreach encName [encoding names] { - set tclNames($encName) "" + set unmappedCodePoints($encName) {} + for {set i 0} {$i <= 65535} {incr i} { + set hex [format %.4X $i] + if {[lsearch -exact $mapped $hex] < 0} { + lappend unmappedCodePoints($encName) $hex + # Only look for (at most) one below 256 and one above 1024 + if {$i < 255} { + # Found one so jump past 8 bits + set i 255 + } else { + break + } + } + if {$i == 255} { + set i 1023 + } } + lappend unmappedCodePoints($encName) D800 DC00 10000 10FFFF +} +proc ucm::generate_boilerplate {} { # Common procedures print { # This file is automatically generated by ucm2tests.tcl. @@ -118,6 +153,7 @@ proc ucm::generate_tests {} { proc ucmConvertfromMismatches {enc map} { set mismatches {} foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits set unich [subst "\\U$unihex"] if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { lappend mismatches "<[printable $unich],$hex>" @@ -128,6 +164,7 @@ proc ucmConvertfromMismatches {enc map} { proc ucmConverttoMismatches {enc map} { set mismatches {} foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits set unich [subst "\\U$unihex"] if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { lappend mismatches "<[printable $unich],$hex>" @@ -154,6 +191,30 @@ if {[info commands printable] eq ""} { } } } +} ; # generate_boilerplate + +proc ucm::generate_tests {} { + variable encNameMap + variable charMap + variable invalidCodeSequences + variable unmappedCodePoints + variable outputPath + variable outputChan + variable encSubchar + + if {[info exists outputPath]} { + set outputChan [open $outputPath w] + fconfigure $outputChan -translation lf + } else { + set outputChan stdout + } + + array set tclNames {} + foreach encName [encoding names] { + set tclNames($encName) "" + } + + generate_boilerplate foreach encName [lsort -dictionary [array names encNameMap]] { if {![info exists charMap($encName)]} { warn "No character map read for $encName" @@ -161,6 +222,7 @@ if {[info commands printable] eq ""} { } unset tclNames($encName) + # Print the valid tests print "\n#\n# $encName (generated from $encNameMap($encName))" print "\ntest encoding-convertfrom-ucmCompare-$encName {Compare against ICU UCM} -body \{" print " ucmConvertfromMismatches $encName {$charMap($encName)}" @@ -172,13 +234,42 @@ if {[info commands printable] eq ""} { # This will generate individual tests for every char # and test in lead, tail, middle, solo configurations # but takes considerable time - print "lappend encValidStrings {*}{" + print "lappend encValidStrings \{*\}\{" foreach {unich hex} $charMap($encName) { print " $encName \\u$unich $hex {} {}" } - print "}; # $encName" + print "\}; # $encName" + } + + # Generate the invalidity checks + print "\n# $encName - invalid byte sequences" + print "lappend encInvalidBytes \{*\}\{" + foreach hex $invalidCodeSequences($encName) { + # Map XXXX... to \xXX\xXX... + set uhex [regsub -all .. $hex {\\x\0}] + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $hex tcl8 $uhex -1 {} {}" + print " $encName $hex replace \\uFFFD -1 {} {}" + print " $encName $hex strict {} 0 {} {}" + } + print "\}; # $encName" + + print "\n# $encName - invalid byte sequences" + print "lappend encUnencodableStrings \{*\}\{" + if {[info exists encSubchar($encName)]} { + set subchar $encSubchar($encName) + } else { + set subchar "3F"; # Tcl uses ? by default } + foreach hex $unmappedCodePoints($encName) { + set uhex \\U[string range 00000000$hex end-7 end] + print " $encName $uhex tcl8 $subchar -1 {} {}" + print " $encName $uhex replace $subchar -1 {} {}" + print " $encName $uhex strict {} 0 {} {}" + } + print "\}; # $encName" } + if {[array size tclNames]} { warn "Missing encoding: [lsort [array names tclNames]]" } @@ -190,6 +281,8 @@ if {[info commands printable] eq ""} { proc ucm::parse_file {encName ucmPath} { variable charMap + variable encSubchar + set fd [open $ucmPath] try { # Parse the metadata @@ -205,7 +298,7 @@ proc ucm::parse_file {encName ucmPath} { } } if {![info exists state(charmap)]} { - abort "Error: $path has No CHARMAP line." + abort "Error: $ucmPath has No CHARMAP line." } foreach key {code_set_name uconv_class} { if {[info exists state($key)]} { @@ -216,18 +309,22 @@ proc ucm::parse_file {encName ucmPath} { abort "Duplicate file for $encName ($path)" } if {![info exists state(uconv_class)]} { - abort "Error: $path has no uconv_class definition." + abort "Error: $ucmPath has no uconv_class definition." + } + if {[info exists state(subchar)]} { + # \xNN\xNN.. -> NNNN.. + set encSubchar($encName) [string map {\\x {}} $state(subchar)] } switch -exact -- $state(uconv_class) { SBCS { if {[catch { - set charMap($encName) [parse_SBCS $fd] + parse_SBCS $encName $fd } result]} { - abort "Could not process $path. $result" + abort "Could not process $ucmPath. $result" } } default { - log "Skipping $path -- not SBCS encoding." + log "Skipping $ucmPath -- not SBCS encoding." return } } @@ -236,15 +333,6 @@ proc ucm::parse_file {encName ucmPath} { } } -proc ucm::expand_paths {patterns} { - set expanded {} - foreach pat $patterns { - # The file join is for \ -> / - lappend expanded {*}[glob -nocomplain [file join $pat]] - } - return $expanded -} - proc ucm::run {} { variable encNameMap variable outputPath -- cgit v0.12 From 1d76ffb03b359c7f557943523fd9b0c49a312554 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Feb 2023 20:44:13 +0000 Subject: minor bug-fix in utf-16/utf-32: 2 testcases failed in Tcl 9 compatibility mode (-DTCL_NO_DEPRECATED) --- generic/tclEncoding.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0490831..8e13b43 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -237,8 +237,13 @@ static Tcl_EncodingConvertProc Iso88591ToUtfProc; */ static const Tcl_ObjType encodingType = { - "encoding", FreeEncodingInternalRep, DupEncodingInternalRep, NULL, NULL + "encoding", + FreeEncodingInternalRep, + DupEncodingInternalRep, + NULL, + NULL }; + #define EncodingSetInternalRep(objPtr, encoding) \ do { \ Tcl_ObjInternalRep ir; \ @@ -461,7 +466,7 @@ FillEncodingFileMap(void) map = Tcl_NewDictObj(); Tcl_IncrRefCount(map); - for (i = numDirs-1; i >= 0; i--) { + for (i = numDirs-1; i != TCL_INDEX_NONE; i--) { /* * Iterate backwards through the search path so as we overwrite * entries found, we favor files earlier on the search path. @@ -1182,7 +1187,7 @@ Tcl_ExternalToUtfDString( * Tcl_ExternalToUtfDStringEx -- * * Convert a source buffer from the specified encoding into UTF-8. -* The parameter flags controls the behavior, if any of the bytes in + * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but @@ -1458,8 +1463,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int dstLen, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; const char *srcStart = src; + int dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -2627,9 +2633,10 @@ Utf32ToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; } else { + result = TCL_OK; dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; src += bytesLeft; @@ -2854,9 +2861,10 @@ Utf16ToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_SYNTAX; } else { + result = TCL_OK; dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; src++; -- cgit v0.12 From d1920b380d4a987240715b3ce72f7d68dfca2b09 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 23 Feb 2023 10:22:58 +0000 Subject: Fix gcc warnings and encoding error message (bug [40c61a5d10]) --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 4 ++-- tests/cmdAH.test | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4dfb541..1b74064 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -589,7 +589,7 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ interp, 1, objv, - "??-profile profile? ?-failindex var? ?encoding?? data"); + "? ?-profile profile? ?-failindex var? encoding ? data"); return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index bc830b4..a877468 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -4265,7 +4265,7 @@ TclEncodingProfileNameToId( const char *profileName, /* Name of profile */ int *profilePtr) /* Output */ { - int i; + size_t i; for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { if (!strcmp(profileName, encodingProfiles[i].name)) { @@ -4305,7 +4305,7 @@ TclEncodingProfileIdToName( Tcl_Interp *interp, /* For error messages. May be NULL */ int profileValue) /* Profile #define value */ { - int i; + size_t i; for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { if (profileValue == encodingProfiles[i].value) { diff --git a/tests/cmdAH.test b/tests/cmdAH.test index cfde678..d76607c 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -175,8 +175,8 @@ test cmdAH-3.2 {Tcl_ContinueObjCmd, success} { # encoding command set "numargErrors(encoding system)" {^wrong # args: should be "(encoding |::tcl::encoding::)system \?encoding\?"$} -set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} -set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \?\?-profile profile\? \?-failindex var\? \?encoding\?\? data"$} +set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \? \?-profile profile\? \?-failindex var\? encoding \? data"$} +set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \? \?-profile profile\? \?-failindex var\? encoding \? data"$} set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} -- cgit v0.12 From bf7b1adb896dbe4f79efb038aa0ecaebbdd3919c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Feb 2023 10:44:56 +0000 Subject: See [d19fe0a5b] for follow-up to previous commit --- generic/tclEncoding.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ecec6e9..2b3b614 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2516,13 +2516,10 @@ UnicodeToUtfProc( result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (flags & TCL_ENCODING_STOPONERROR) { - result = TCL_CONVERT_SYNTAX; - } else { - dst += Tcl_UniCharToUtf(0xFFFD, dst); - numChars++; - src++; - } + result = TCL_OK; + dst += Tcl_UniCharToUtf(0xFFFD, dst); + numChars++; + src++; } } *srcReadPtr = src - srcStart; -- cgit v0.12 From da915fdadfa41477f967f92d37c63e278621acd7 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 23 Feb 2023 13:19:45 +0000 Subject: New signature for Tcl_ExternalToUtfDStringEx and Tcl_UtfToExternalDStringEx as per TIP 656 --- generic/tcl.decls | 14 ++-- generic/tclCmdAH.c | 99 +++++++++++++++++++++++++--- generic/tclDecls.h | 18 +++--- generic/tclEncoding.c | 174 +++++++++++++++++++++++++++++++++++++++----------- tests/cmdAH.test | 24 +++++-- 5 files changed, 264 insertions(+), 65 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index a48ab02..a789ef6 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2441,13 +2441,17 @@ declare 656 { declare 657 { int Tcl_UniCharIsUnicode(int ch) } + +# TIP 656 declare 658 { - Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr) -} + int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr) +} declare 659 { - Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr) + int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr) } # TIP #511 diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 1b74064..24b2038 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -671,6 +671,7 @@ EncodingConvertfromObjCmd( int flags; int result; Tcl_Obj *failVarObj; + Tcl_Size errorLocation; if (EncodingConvertParseOptions( interp, objc, objv, &encoding, &data, &flags, &failVarObj) @@ -693,8 +694,47 @@ EncodingConvertfromObjCmd( if (bytesPtr == NULL) { return TCL_ERROR; } - result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, - flags, &ds); + result = Tcl_ExternalToUtfDStringEx(interp, encoding, bytesPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + break; + } + + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + /* I hope, wide int will cover Tcl_Size data type */ + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + Tcl_NewWideIntObj(errorLocation), + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + } +#ifdef OBSOLETE if (result != TCL_INDEX_NONE && TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { @@ -717,6 +757,7 @@ EncodingConvertfromObjCmd( return TCL_ERROR; } } +#endif /* * Note that we cannot use Tcl_DStringResult here because it will @@ -725,9 +766,7 @@ EncodingConvertfromObjCmd( Tcl_SetObjResult(interp, Tcl_DStringToObj(&ds)); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; @@ -763,6 +802,7 @@ EncodingConverttoObjCmd( int result; int flags; Tcl_Obj *failVarObj; + Tcl_Size errorLocation; if (EncodingConvertParseOptions( interp, objc, objv, &encoding, &data, &flags, &failVarObj) @@ -775,8 +815,47 @@ EncodingConverttoObjCmd( */ stringPtr = TclGetStringFromObj(data, &length); - result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, - flags, &ds); + result = Tcl_UtfToExternalDStringEx(interp, encoding, stringPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + break; + } + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + /* I hope, wide int will cover Tcl_Size data type */ + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + Tcl_NewWideIntObj(errorLocation), + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + } +#ifdef OBSOLETE if (result != TCL_INDEX_NONE && TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { if (failVarObj != NULL) { @@ -802,14 +881,14 @@ EncodingConverttoObjCmd( return TCL_ERROR; } } +#endif + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); Tcl_DStringFree(&ds); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 77517e8..fbfa8a1 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1955,13 +1955,15 @@ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); /* 657 */ EXTERN int Tcl_UniCharIsUnicode(int ch); /* 658 */ -EXTERN Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, - Tcl_DString *dsPtr); +EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, + Tcl_Encoding encoding, const char *src, + int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr); /* 659 */ -EXTERN Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, - Tcl_DString *dsPtr); +EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, + Tcl_Encoding encoding, const char *src, + int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr); /* 660 */ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, int sigNumber); @@ -2741,8 +2743,8 @@ typedef struct TclStubs { const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */ - Tcl_Size (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ - Tcl_Size (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 659 */ + int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */ + int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */ int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */ int (*tclListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */ int (*tclListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index daab3a9..365aa90 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1203,7 +1203,8 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr); + Tcl_ExternalToUtfDStringEx( + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1223,29 +1224,49 @@ Tcl_ExternalToUtfDString( * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80 * to 0x00. Only valid for "utf-8" and "cesu-8". + * Any other flag bits will cause an error to be returned (for future + * compatibility) * * Results: - * The converted bytes are stored in the DString, which is then NULL - * terminated in an encoding-specific manner. The return value is - * the error position in the source string or -1 if no conversion error - * is reported. - * + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. + * * Side effects: - * None. + * + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner. + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_ExternalToUtfDStringEx( + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ - Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + (or TCL_INDEX_NONE if no error). May + be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1253,14 +1274,14 @@ Tcl_ExternalToUtfDStringEx( int dstLen, result, soFar, srcRead, dstWrote, dstChars; const char *srcStart = src; - Tcl_DStringInit(dstPtr); + Tcl_DStringInit(dstPtr); /* Must always be initialized before returning */ dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; if (encoding == NULL) { - encoding = systemEncoding; + encoding = systemEncoding; } - encodingPtr = (Encoding *) encoding; + encodingPtr = (Encoding *)encoding; if (src == NULL) { srcLen = 0; @@ -1275,26 +1296,53 @@ Tcl_ExternalToUtfDStringEx( } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcLen, flags, &state, dst, dstLen, + &srcRead, &dstWrote, &dstChars); + soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + + src += srcRead; + if (result != TCL_CONVERT_NOSPACE) { + Tcl_Size nBytesProcessed = (Tcl_Size)(src - srcStart); + + Tcl_DStringSetLength(dstPtr, soFar); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } + else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%u", nBytesProcessed); + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf("unexpected byte sequence starting at index %" + "u: '\\x%X'", + nBytesProcessed, + UCHAR(srcStart[nBytesProcessed]))); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); + } + } + return result; + } - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - Tcl_DStringSetLength(dstPtr, soFar); - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); - } - flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; - if (Tcl_DStringLength(dstPtr) == 0) { - Tcl_DStringSetLength(dstPtr, dstLen); - } - Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); - dst = Tcl_DStringValue(dstPtr) + soFar; - dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; + /* Expand space and continue */ + flags &= ~TCL_ENCODING_START; + srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { + Tcl_DStringSetLength(dstPtr, dstLen); + } + Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); + dst = Tcl_DStringValue(dstPtr) + soFar; + dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; } } - + /* *------------------------------------------------------------------------- * @@ -1441,7 +1489,8 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr); + Tcl_UtfToExternalDStringEx( + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1462,27 +1511,45 @@ Tcl_UtfToExternalDString( * of 0x00. Only valid for "utf-8" and "cesu-8". * * Results: - * The converted bytes are stored in the DString, which is then NULL - * terminated in an encoding-specific manner. The return value is - * the error position in the source string or -1 if no conversion error - * is reported. + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. * * Side effects: - * None. + * + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ int Tcl_UtfToExternalDStringEx( + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ - Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + (or TCL_INDEX_NONE if no error). May + be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1505,21 +1572,49 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen < 0) { srcLen = strlen(src); } + flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); + srcLen, flags, &state, dst, dstLen, + &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); src += srcRead; if (result != TCL_CONVERT_NOSPACE) { + Tcl_Size nBytesProcessed = (Tcl_Size)(src - srcStart); int i = soFar + encodingPtr->nullSize - 1; while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } + else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); + int ucs4; + char buf[TCL_INTEGER_SPACE]; + TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); + sprintf(buf, "%u", nBytesProcessed); + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "unexpected character at index %" TCL_Z_MODIFIER + "u: 'U+%06X'", + pos, + ucs4)); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + } + } + return result; } flags &= ~TCL_ENCODING_START; @@ -2682,6 +2777,8 @@ Utf32ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + + /* * If we had a truncated code unit at the end AND this is the last * fragment AND profile is not "strict", stick FFFD in its place. @@ -2917,6 +3014,7 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + /* * If we had a truncated code unit at the end AND this is the last * fragment AND profile is not "strict", stick FFFD in its place. diff --git a/tests/cmdAH.test b/tests/cmdAH.test index f8eba4e..471d46a 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -703,15 +703,25 @@ lappend encInvalidBytes {*}{ # happen when the sequence is at the end (including by itself) Thus {solo tail} # in some cases. lappend encInvalidBytes {*}{ - utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} - utf-16le 41 strict {} 0 {solo tail} {Truncated} + utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 strict {} 0 {solo tail} {Truncated} utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate} utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate} utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate} utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate} utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate} utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate} + + utf-16be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 strict {} 0 {solo tail} {Truncated} + utf-16be D800 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16be D800 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16be D800 strict {} 0 {knownBug} {Missing low surrogate} + utf-16be DC00 tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16be DC00 replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16be DC00 strict {} 0 {knownBug} {Missing high surrogate} } # utf32-le and utf32-be test cases. Note utf32 cases are automatically generated @@ -727,7 +737,7 @@ lappend encInvalidBytes {*}{ utf-32le 4100 strict {} 0 {solo tail} {Truncated} utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated} utf-32le 410000 replace \uFFFD -1 {solo} {Truncated} - utf-32le 410000 strict {} 0 {solo tail} {Truncated} + utf-32le 410000 strict {} 0 {solo tail} {Truncated} utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate} utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate} utf-32le 00D80000 strict {} 0 {} {High-surrogate} @@ -745,8 +755,14 @@ lappend encInvalidBytes {*}{ utf-32le FFFFFFFF strict {} 0 {} {Out of range} utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 strict {} 0 {solo tail} {Truncated} utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 0041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 0041 strict {} 0 {solo tail} {Truncated} utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 000041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 000041 strict {} 0 {solo tail} {Truncated} utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate} utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate} utf-32be 0000D800 strict {} 0 {} {High-surrogate} -- cgit v0.12 From 186cc71273a606360094ccb275bc239c6c17235a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 23 Feb 2023 13:24:58 +0000 Subject: Had forgotten to remove disabled code --- generic/tclCmdAH.c | 52 ---------------------------------------------------- 1 file changed, 52 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 24b2038..93c3416 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -734,31 +734,6 @@ EncodingConvertfromObjCmd( return TCL_ERROR; } } -#ifdef OBSOLETE - if (result != TCL_INDEX_NONE && - TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { - if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - char buf[TCL_INTEGER_SPACE]; - sprintf(buf, "%u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); - return TCL_ERROR; - } - } - else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } -#endif - /* * Note that we cannot use Tcl_DStringResult here because it will * truncate the string at the first null byte. @@ -855,33 +830,6 @@ EncodingConverttoObjCmd( return TCL_ERROR; } } -#ifdef OBSOLETE - if (result != TCL_INDEX_NONE && - TCL_ENCODING_PROFILE_GET(flags) != TCL_ENCODING_PROFILE_TCL8) { - if (failVarObj != NULL) { - /* I hope, wide int will cover size_t data type */ - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - size_t pos = Tcl_NumUtfChars(stringPtr, result); - int ucs4; - char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&stringPtr[result], &ucs4); - sprintf(buf, "%u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); - return TCL_ERROR; - } - } else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } -#endif Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), -- cgit v0.12 From 10c559acbfbd8c8848e7f8fb9166e00e2aec2dc5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Feb 2023 21:20:21 +0000 Subject: Remove left-over traces of [0a74820b6d], which was merged into the apn-encoding-profile and landed into tip-656. This commit was merged premature into core-8-branch, leaving a [dab7fd5973|memory leak] --- generic/tclIO.c | 59 +------ generic/tclIOCmd.c | 25 +-- tests/io.test | 474 +++++++++++------------------------------------------ 3 files changed, 99 insertions(+), 459 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 880b669..b12adf6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4645,7 +4645,6 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - int reportError = 0; int oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; @@ -4654,7 +4653,6 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } @@ -4914,19 +4912,6 @@ Tcl_GetsObj( goto done; } goto gotEOL; - } else if (gs.bytesWrote == 0 - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* Set eol to the position that caused the encoding error, and then - * coninue to gotEOL, which stores the data that was decoded - * without error to objPtr. This allows the caller to do something - * useful with the data decoded so far, and also results in the - * position of the file being the first byte that was not - * succesfully decoded, allowing further processing at exactly that - * point, if desired. - */ - eol = dstEnd; - reportError = 1; - goto gotEOL; } dst = dstEnd; } @@ -4970,16 +4955,7 @@ Tcl_GetsObj( Tcl_SetObjLength(objPtr, eol - objPtr->bytes); CommonGetsCleanup(chanPtr); ResetFlag(statePtr, CHANNEL_BLOCKED); - if (reportError) { - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); - /* reset CHANNEL_ENCODING_ERROR to afford a chance to reconfigure - * the channel and try again - */ - Tcl_SetErrno(EILSEQ); - copiedTotal = -1; - } else { - copiedTotal = gs.totalChars + gs.charsWrote - skip; - } + copiedTotal = gs.totalChars + gs.charsWrote - skip; goto done; /* @@ -6007,9 +5983,8 @@ DoReadChars( } if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); - Tcl_SetErrno(EILSEQ); return -1; } @@ -6025,7 +6000,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); return 0; } @@ -6039,7 +6014,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: UpdateInterest not needed here? */ + /* TODO: We don't need this call? */ UpdateInterest(chanPtr); return 0; } @@ -6070,7 +6045,7 @@ DoReadChars( } /* - * Recycle current buffer if empty. + * If the current buffer is empty recycle it. */ bufPtr = statePtr->inQueueHead; @@ -6083,24 +6058,6 @@ DoReadChars( statePtr->inQueueTail = NULL; } } - - /* - * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, - * then CHANNEL_ENCODING_ERROR was caused by data that occurred - * after the EOF character was encountered, so it doesn't count as - * a real error. - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) - && !GotFlag(statePtr, CHANNEL_STICKY_EOF) - && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { - /* Channel is synchronous. Return an error so that callers - * like [read] can return an error. - */ - Tcl_SetErrno(EILSEQ); - copied = -1; - goto finish; - } } if (copiedNow < 0) { @@ -6129,7 +6086,6 @@ DoReadChars( } } -finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the @@ -6793,14 +6749,11 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. - * - * If CHANNEL_ENCODING_ERROR is set, it can only be because of data - * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); } } diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 507e06c..e8a534f 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -296,9 +296,6 @@ Tcl_GetsObjCmd( int lineLen; /* Length of line just read. */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *linePtr, *chanObjPtr; - /* - Tcl_Obj *resultDictPtr, *returnOptsPtr; - */ int code = TCL_OK; if ((objc != 2) && (objc != 3)) { @@ -321,6 +318,7 @@ Tcl_GetsObjCmd( lineLen = Tcl_GetsObj(chan, linePtr); if (lineLen < 0) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { + Tcl_DecrRefCount(linePtr); /* * TIP #219. @@ -334,15 +332,6 @@ Tcl_GetsObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - /* - resultDictPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) - , linePtr); - returnOptsPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , resultDictPtr); - Tcl_SetReturnOptions(interp, returnOptsPtr); - */ code = TCL_ERROR; goto done; } @@ -393,9 +382,6 @@ Tcl_ReadObjCmd( int charactersRead; /* How many characters were read? */ int mode; /* Mode in which channel is opened. */ Tcl_Obj *resultPtr, *chanObjPtr; - /* - Tcl_Obj *resultDictPtr, *returnOptsPtr; - */ if ((objc != 2) && (objc != 3)) { Interp *iPtr; @@ -484,17 +470,8 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - /* - resultDictPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, resultDictPtr, Tcl_NewStringObj("read", -1) - , resultPtr); - returnOptsPtr = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOptsPtr, Tcl_NewStringObj("-result", -1) - , resultDictPtr); TclChannelRelease(chan); Tcl_DecrRefCount(resultPtr); - Tcl_SetReturnOptions(interp, returnOptsPtr); - */ return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index 0f47a8e..4578a93 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1547,53 +1547,19 @@ test io-12.8 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 160 - - -apply [list {} { - set template { - test io-12.9.@variant@ {ReadChars: multibyte chars split, default (strict)} -body { - set res {} - set f [open $path(test1) w] - fconfigure $f -translation binary - puts -nonewline $f [string repeat a 9]\xC2 - close $f - set f [open $path(test1)] - fconfigure $f -encoding utf-8 @strict@ -buffersize 10 - set status [catch {read $f} cres copts] - #set in [dict get $copts -result] - #lappend res $in - lappend res $status $cres - set status [catch {read $f} cres copts] - #set in [dict get $copts -result] - #lappend res $in - lappend res $status $cres - set res - } -cleanup { - catch {close $f} - } -match glob\ - } - - #append template {\ - # -result {{read aaaaaaaaa} 1\ - # {error reading "*": illegal byte sequence}\ - # {read {}} 1 {error reading "*": illegal byte sequence}} - #} - - append template {\ - -result {1\ - {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence}} - } - - # strict encoding may be the default in Tcl 9, but in 8 it is not - foreach variant {encodingstrict} strict {{-encodingprofile strict}} { - set script [string map [ - list @variant@ $variant @strict@ $strict] $template] - uplevel 1 $script - } -} [namespace current]] - - +test io-12.9 {ReadChars: multibyte chars split} -body { + set f [open $path(test1) w] + fconfigure $f -translation binary + puts -nonewline $f [string repeat a 9]\xC2 + close $f + set f [open $path(test1)] + fconfigure $f -encoding utf-8 -buffersize 10 + set in [read $f] + close $f + scan [string index $in end] %c +} -cleanup { + catch {close $f} +} -result 194 test io-12.10 {ReadChars: multibyte chars split} -body { set f [open $path(test1) w] fconfigure $f -translation binary @@ -9177,136 +9143,68 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 +test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg +} -cleanup { + removeFile io-75.6 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} -apply [list {} { - - - set test { - test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status $cres - } -cleanup { - close $f - removeFile io-75.6 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test - - set test { - test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd [eof $f] - lappend hd $status - lappend hd $cres - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd - } -cleanup { - removeFile io-75.7 - } -match glob\ - } - - #append test {\ - # -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - #} - - append test {\ - -result {0 1 {error reading "*": illegal byte sequence} ¡} - } - - uplevel 1 $test - - -} [namespace current]] - - - -test io-75.8.incomplete { - incomplete uft-8 char after eof char is not an error (-encodingprofile strict) -} -setup { - set hd {} - set fn [makeFile {} io-75.8] +test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary - # \x81 is invalid and also incomplete utf-8 data, but because the eof - # character \x1A appears first, it's not an error. - puts -nonewline $f A\x1A\x81 + # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. + puts -nonewline $f A\xA1\x1A flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 } -body { set d [read $f] binary scan $d H* hd lappend hd [eof $f] - # there should be no error on additional reads - lappend hd [read $f] + lappend hd [catch {read $f} msg] + lappend hd $msg + fconfigure $f -encoding iso8859-1 + lappend hd [read $f];# We changed encoding, so now we can read the \xA1 close $f set hd } -cleanup { - removeFile io-75.8 -} -result {41 1 {}} + removeFile io-75.7 +} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - -test io-75.8.invalid {invalid utf-8 after eof char is not an error (-encodingprofile strict)} -setup { - set res {} +test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary - # \xc0\x80 is invalid utf-8 data, but because the eof character \x1A - # appears first, it's not an error. - puts -nonewline $f A\x1a\xc0\x80 + # \x81 is invalid in utf-8, but since \x1A comes first, -eofchar takes precedence. + puts -nonewline $f A\x1A\x81 flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] - foreach char [split $d {}] { - lappend res [format %x [scan $char %c]] - } - lappend res [eof $f] - # there should be no error on additional reads - lappend res [read $f] + binary scan $d H* hd + lappend hd [eof $f] + lappend hd [read $f] close $f - set res + set hd } -cleanup { removeFile io-75.8 } -result {41 1 {}} - test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] @@ -9321,7 +9219,9 @@ test io-75.9 {unrepresentable character write passes and is replaced by ?} -setu removeFile io-75.9 } -match glob -result [list {A} {error writing "*": illegal byte sequence}] - +# Incomplete sequence test. +# This error may IMHO only be detected with the close. +# But the read already returns the incomplete sequence. test io-75.10 {incomplete multibyte encoding read is ignored} -setup { set fn [makeFile {} io-75.10] set f [open $fn w+] @@ -9329,7 +9229,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -buffering none } -body { set d [read $f] close $f @@ -9338,135 +9238,39 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { } -cleanup { removeFile io-75.10 } -result 41c0 +# The current result returns the orphan byte as byte. +# This may be expected due to special utf-8 handling. +# As utf-8 has a special treatment in multi-byte decoding, also test another +# one. +test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { + set fn [makeFile {} io-75.11] + set f [open $fn w+] + fconfigure $f -encoding binary + # In shiftjis, \x81 starts a two-byte sequence. + # But 2nd byte \xFF is not allowed + puts -nonewline $f A\x81\xFFA + flush $f + seek $f 0 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 +} -body { + set d [read $f] + binary scan $d H* hd + lappend hd [catch {set d [read $f]} msg] + lappend hd $msg +} -cleanup { + close $f + removeFile io-75.11 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} -apply [list {} { - - set test { - test io-75.10_strict {incomplete multibyte encoding read is an error} -setup { - set res {} - set fn [makeFile {} io-75.10] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\xC0 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile strict -buffering none - } -body { - set status [catch {read $f} cres copts] - - #set d [dict get $copts -result read] - #binary scan $d H* hd - #lappend res $hd $cres - lappend res $cres - - chan configure $f -encoding iso8859-1 - - set d [read $f] - binary scan $d H* hd - lappend res $hd - close $f - return $res - } -cleanup { - removeFile io-75.10 - } -match glob\ - } - - #append test {\ - # -result {41 {error reading "*": illegal byte sequence} c0} - #} - - append test {\ - -result {{error reading "*": illegal byte sequence} c0} - } - - uplevel 1 $test - - - - set test { - # As utf-8 has a special treatment in multi-byte decoding, also test another - # one. - test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { - set hd {} - set fn [makeFile {} io-75.11] - set f [open $fn w+] - fconfigure $f -encoding binary - # In shiftjis, \x81 starts a two-byte sequence. - # But 2nd byte \xFF is not allowed - puts -nonewline $f A\x81\xFFA - flush $f - seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" \ - -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status - lappend hd $cres - } -cleanup { - close $f - removeFile io-75.11 - } -match glob - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - - set test { - test io-75.12 {invalid utf-8 encoding read is an error} -setup { - set hd {} - set res {} - set fn [makeFile {} io-75.12] - set f [open $fn w+] - fconfigure $f -encoding binary - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf \ - -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - #lappend res $hd - lappend res $status $cres - return $res - } -cleanup { - catch {close $f} - removeFile io-75.12 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test -} [namespace current]] - - -test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { +test io-75.12 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ - -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] close $f @@ -9475,121 +9279,27 @@ test io-75.12_ignore {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 - - -apply [list {} { - - set test { - test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { - set hd {} - set fn [makeFile {} io-75.13] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" \ - -translation lf -encodingprofile strict - } -body { - set status [catch {read $f} cres copts] - #set d [dict get $copts -result read] - #binary scan $d H* hd - lappend hd $status - lappend hd $cres - } -cleanup { - catch {close $f} - removeFile io-75.13 - } -match glob\ - } - - #append test {\ - # -result {41 1 {error reading "*": illegal byte sequence}} - #} - - append test {\ - -result {1 {error reading "*": illegal byte sequence}} - } - - uplevel 1 $test - - set test { - } - -} [namespace current]] - - -test io-75.14 { - invalid utf-8 encoding [gets] continues in non-strict mode after error -} -setup { - set res {} - set fn [makeFile {} io-75.14] +test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { + set fn [makeFile {} io-75.13] set f [open $fn w+] - fconfigure $f -translation binary - # \xc0 is invalid in utf-8 - puts -nonewline $f a\nb\xc0\nc\n + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 } -body { - lappend res [gets $f] - set status [catch {gets $f} cres copts] - lappend res $status $cres - chan configure $f -encodingprofile tcl8 - lappend res [gets $f] - lappend res [gets $f] - close $f - return $res + set d [read $f] + binary scan $d H* hd + lappend hd [catch {read $f} msg] + close $f + lappend hd $msg } -cleanup { - removeFile io-75.14 -} -match glob -result {a 1 {error reading "*": illegal byte sequence} bÀ c} - - - -apply [list {} { - set test { - test io-75.15 {invalid utf-8 encoding strict gets should not hang} -setup { - set res {} - set fn [makeFile {} io-75.15] - set chan [open $fn w+] - fconfigure $chan -encoding binary - # This is not valid UTF-8 - puts $chan hello\nAB\xc0\x40CD\nEFG - close $chan - } -body { - #Now try to read it with [gets] - set chan [open $fn] - fconfigure $chan -encoding utf-8 -encodingprofile strict - lappend res [gets $chan] - set status [catch {gets $chan} cres copts] - lappend res $status $cres - set status [catch {gets $chan} cres copts] - lappend res $status $cres - #lappend res [dict get $copts -result] - chan configur $chan -encoding binary - foreach char [split [read $chan 2] {}] { - lappend res [format %x [scan $char %c]] - } - return $res - } -cleanup { - close $chan - removeFile io-75.15 - } -match glob\ - } + removeFile io-75.13 +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - #append test {\ - # -result {hello 1 {error reading "*": illegal byte sequence}\ - # 1 {error reading "*": illegal byte sequence} {read AB} c0 40} - #} - - append test {\ - -result {hello 1 {error reading "*": illegal byte sequence}\ - 1 {error reading "*": illegal byte sequence} c0 40} - } - - uplevel 1 $test +# ### ### ### ######### ######### ######### -} [namespace current]] test io-76.0 {channel modes} -setup { -- cgit v0.12 From 6caf48437905145c68bd35e5c12819a86540b235 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Feb 2023 21:31:04 +0000 Subject: -strictencoding 1 -> -encodingprofile strict (since the testcases placed back in previous commit didn't have that yet) --- tests/io.test | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/io.test b/tests/io.test index 4578a93..a8f7bc7 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9143,7 +9143,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9151,7 +9151,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9162,7 +9162,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s removeFile io-75.6 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary @@ -9170,7 +9170,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { puts -nonewline $f A\xA1\x1A flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9185,7 +9185,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { removeFile io-75.7 } -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} -test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9252,7 +9252,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9279,7 +9279,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9287,7 +9287,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd -- cgit v0.12 From 485bc2fd887abb2501321c670e66c849da1b026c Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 24 Feb 2023 03:35:31 +0000 Subject: Bug [40c61a5d10]. Fix syntax error message. --- generic/tclCmdAH.c | 11 ++++++----- tests/cmdAH.test | 4 ++-- tests/safe.test | 8 ++++---- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 93c3416..19a5bc3 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -585,11 +585,12 @@ EncodingConvertParseOptions ( if (objc == 1) { numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ - Tcl_WrongNumArgs( - interp, - 1, - objv, - "? ?-profile profile? ?-failindex var? encoding ? data"); + Tcl_WrongNumArgs(interp, + 1, + objv, + "?-profile profile? ?-failindex var? encoding data"); + ((Interp *)interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; + Tcl_WrongNumArgs(interp, 1, objv, "data"); return TCL_ERROR; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 471d46a..ba78c23 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -175,8 +175,8 @@ test cmdAH-3.2 {Tcl_ContinueObjCmd, success} { # encoding command set "numargErrors(encoding system)" {^wrong # args: should be "(encoding |::tcl::encoding::)system \?encoding\?"$} -set "numargErrors(encoding convertfrom)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \? \?-profile profile\? \?-failindex var\? encoding \? data"$} -set "numargErrors(encoding convertto)" {^wrong # args: should be "(encoding |::tcl::encoding::)convertto \? \?-profile profile\? \?-failindex var\? encoding \? data"$} +set "numargErrors(encoding convertfrom)" {wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \?-profile profile\? \?-failindex var\? encoding data" or "(encoding |::tcl::encoding::)convertfrom data"} +set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tcl::encoding::)convertto \?-profile profile\? \?-failindex var\? encoding data" or "(encoding |::tcl::encoding::)convertto data"} set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} diff --git a/tests/safe.test b/tests/safe.test index 8c8382a..f3890b7 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ??-profile profile? ?-failindex var? ?encoding?? data"} +} -result {wrong # args: should be "encoding convertfrom ?-profile profile? ?-failindex var? encoding data" or "encoding convertfrom data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ??-profile profile? ?-failindex var? ?encoding?? data" +} -result {wrong # args: should be "encoding convertfrom ?-profile profile? ?-failindex var? encoding data" or "encoding convertfrom data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ??-profile profile? ?-failindex var? ?encoding?? data"} +} -result {wrong # args: should be "encoding convertto ?-profile profile? ?-failindex var? encoding data" or "encoding convertto data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ??-profile profile? ?-failindex var? ?encoding?? data" +} -result {wrong # args: should be "encoding convertto ?-profile profile? ?-failindex var? encoding data" or "encoding convertto data" while executing "encoding convertto" invoked from within -- cgit v0.12 From 1c3c25097b1f63d6b1a0446c2c441833c4ecec11 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 24 Feb 2023 08:25:27 +0000 Subject: int -> Tcl_Size in tclEncoding.c (making the diff between Tcl 8.7 and 9.0 smaller) --- generic/tclEncoding.c | 54 +++++++++++++++++++++++++-------------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8e13b43..f32baac 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -36,7 +36,7 @@ typedef struct { * encoding is deleted. */ void *clientData; /* Arbitrary value associated with encoding * type. Passed to conversion functions. */ - int nullSize; /* Number of 0x00 bytes that signify + Tcl_Size nullSize; /* Number of 0x00 bytes that signify * end-of-string in this encoding. This number * is used to determine the source string * length when the srcLen argument is @@ -374,7 +374,7 @@ int Tcl_SetEncodingSearchPath( Tcl_Obj *searchPath) { - int dummy; + Tcl_Size dummy; if (TCL_ERROR == TclListObjLengthM(NULL, searchPath, &dummy)) { return TCL_ERROR; @@ -421,7 +421,7 @@ void TclSetLibraryPath( Tcl_Obj *path) { - int dummy; + Tcl_Size dummy; if (TCL_ERROR == TclListObjLengthM(NULL, path, &dummy)) { return; @@ -457,7 +457,7 @@ TclSetLibraryPath( static void FillEncodingFileMap(void) { - int i, numDirs = 0; + Tcl_Size i, numDirs = 0; Tcl_Obj *map, *searchPath; searchPath = Tcl_GetEncodingSearchPath(); @@ -472,7 +472,7 @@ FillEncodingFileMap(void) * entries found, we favor files earlier on the search path. */ - int j, numFiles; + Tcl_Size j, numFiles; Tcl_Obj *directory, *matchFileList; Tcl_Obj **filev; Tcl_GlobTypeData readableFiles = { @@ -1005,7 +1005,7 @@ Tcl_GetEncodingNames( * *--------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_GetEncodingNulLength( Tcl_Encoding encoding) { @@ -1171,7 +1171,7 @@ Tcl_ExternalToUtfDString( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1210,12 +1210,12 @@ Tcl_ExternalToUtfDString( *------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_ExternalToUtfDStringEx( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the @@ -1224,7 +1224,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int dstLen, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; + Tcl_Size dstLen; const char *srcStart = src; Tcl_DStringInit(dstPtr); @@ -1255,7 +1256,7 @@ Tcl_ExternalToUtfDStringEx( src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); + return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } flags &= ~TCL_ENCODING_START; srcLen -= srcRead; @@ -1292,7 +1293,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1302,7 +1303,7 @@ Tcl_ExternalToUtf( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen, /* The maximum length of output buffer in + Tcl_Size dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1409,7 +1410,7 @@ Tcl_UtfToExternalDString( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1449,12 +1450,12 @@ Tcl_UtfToExternalDString( *------------------------------------------------------------------------- */ -int +Tcl_Size Tcl_UtfToExternalDStringEx( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the @@ -1465,7 +1466,7 @@ Tcl_UtfToExternalDStringEx( const Encoding *encodingPtr; int result, soFar, srcRead, dstWrote, dstChars; const char *srcStart = src; - int dstLen; + Tcl_Size dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1494,7 +1495,7 @@ Tcl_UtfToExternalDStringEx( while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return (result == TCL_OK) ? TCL_INDEX_NONE : (int)(src - srcStart); + return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } flags &= ~TCL_ENCODING_START; @@ -1532,7 +1533,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1542,7 +1543,7 @@ Tcl_UtfToExternal( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen, /* The maximum length of output buffer in + Tcl_Size dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1653,7 +1654,7 @@ OpenEncodingFileChannel( Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); Tcl_Obj **dir, *path, *directory = NULL; Tcl_Channel chan = NULL; - int i, numDirs; + Tcl_Size i, numDirs; TclListObjGetElementsM(NULL, searchPath, &numDirs, &dir); Tcl_IncrRefCount(nameObj); @@ -1918,7 +1919,7 @@ LoadTableEncoding( for (i = 0; i < numPages; i++) { int ch; const char *p; - int expected = 3 + 16 * (16 * 4 + 1); + Tcl_Size expected = 3 + 16 * (16 * 4 + 1); if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) { return NULL; @@ -2154,7 +2155,7 @@ LoadEscapeEncoding( Tcl_DStringInit(&escapeData); while (1) { - int argc; + Tcl_Size argc; const char **argv; char *line; Tcl_DString lineString; @@ -3919,8 +3920,7 @@ EscapeFromUtfProc( result = TCL_CONVERT_NOSPACE; break; } - memcpy(dst, subTablePtr->sequence, - subTablePtr->sequenceLen); + memcpy(dst, subTablePtr->sequence, subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } } @@ -4138,11 +4138,11 @@ unilen4( static void InitializeEncodingSearchPath( char **valuePtr, - unsigned int *lengthPtr, + TCL_HASH_TYPE *lengthPtr, Tcl_Encoding *encodingPtr) { const char *bytes; - int i, numDirs, numBytes; + Tcl_Size i, numDirs, numBytes; Tcl_Obj *libPathObj, *encodingObj, *searchPathObj; TclNewLiteralStringObj(encodingObj, "encoding"); -- cgit v0.12 From 854369a67c1719356d036c3fe11e052a7fe62e80 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 24 Feb 2023 09:35:09 +0000 Subject: Factor out encoding test vectors into separate file so they can be used for file IO tests --- tests/cmdAH.test | 634 +------------------------------------------- tests/encodingVectors.tcl | 655 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 656 insertions(+), 633 deletions(-) create mode 100644 tests/encodingVectors.tcl diff --git a/tests/cmdAH.test b/tests/cmdAH.test index ba78c23..cec93d2 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -180,640 +180,8 @@ set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tc set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} -set encProfiles {tcl8 strict replace} -set encDefaultProfile tcl8; # Should reflect the default from implementation - -# TODO - valid sequences for different encodings - shiftjis etc. -# Note utf-16, utf-32 missing because they are automatically -# generated based on le/be versions. -lappend encValidStrings {*}{ - ascii \u0000 00 {} {Lowest ASCII} - ascii \u007F 7F knownBug {Highest ASCII} - ascii \u007D 7D {} {Brace - just to verify test scripts are escaped correctly} - ascii \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly} - - utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1} - utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1} - utf-8 \u0080 C280 {} {Unicode Table 3.7 Row 2} - utf-8 \u07FF DFBF {} {Unicode Table 3.7 Row 2} - utf-8 \u0800 E0A080 {} {Unicode Table 3.7 Row 3} - utf-8 \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3} - utf-8 \u1000 E18080 {} {Unicode Table 3.7 Row 4} - utf-8 \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4} - utf-8 \uD000 ED8080 {} {Unicode Table 3.7 Row 5} - utf-8 \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5} - utf-8 \uE000 EE8080 {} {Unicode Table 3.7 Row 6} - utf-8 \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6} - utf-8 \U10000 F0908080 {} {Unicode Table 3.7 Row 7} - utf-8 \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7} - utf-8 \U40000 F1808080 {} {Unicode Table 3.7 Row 8} - utf-8 \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8} - utf-8 \U100000 F4808080 {} {Unicode Table 3.7 Row 9} - utf-8 \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9} - utf-8 A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5} - - utf-16le \u0000 0000 {} {Lowest code unit} - utf-16le \uD7FF FFD7 {} {Below high surrogate range} - utf-16le \uE000 00E0 {} {Above low surrogate range} - utf-16le \uFFFF FFFF {} {Highest code unit} - utf-16le \U010000 00D800DC {} {First surrogate pair} - utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair} - utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5} - - utf-16be \u0000 0000 {} {Lowest code unit} - utf-16be \uD7FF D7FF {} {Below high surrogate range} - utf-16be \uE000 E000 {} {Above low surrogate range} - utf-16be \uFFFF FFFF {} {Highest code unit} - utf-16be \U010000 D800DC00 {} {First surrogate pair} - utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair} - utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5} - - utf-32le \u0000 00000000 {} {Lowest code unit} - utf-32le \uFFFF FFFF0000 {} {Highest BMP} - utf-32le \U010000 00000100 {} {First supplementary} - utf-32le \U10FFFF ffff1000 {} {Last supplementary} - utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5} - - utf-32be \u0000 00000000 {} {Lowest code unit} - utf-32be \uFFFF 0000FFFF {} {Highest BMP} - utf-32be \U010000 00010000 {} {First supplementary} - utf-32be \U10FFFF 0010FFFF {} {Last supplementary} - utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5} -} - -# Invalid byte sequences. These are driven from a table with format -# {encoding bytes profile expectedresult expectedfailindex ctrl comment} -# -# should be unique for test ids to be unique. Note utf-16, -# utf-32 missing because they are automatically generated based on le/be -# versions. Each entry potentially results in generation of multiple tests. -# This is controlled by the ctrl field. This should be a list of -# zero or more of the following: -# solo - the test data is the string itself -# lead - the test data is the string followed by a valid suffix -# tail - the test data is the string preceded by a prefix -# middle - the test data is the string wrapped by a prefix and suffix -# If the ctrl field is empty it is treated as all of the above -# Note if there is any other value by itself, it will cause the test to -# be skipped. This is intentional to skip known bugs. -# TODO - non-UTF encodings - -# ascii - Any byte above 127 is invalid and is mapped -# to the same numeric code point except for the range -# 80-9F which is treated as cp1252. -# This tests the TableToUtfProc code path. -lappend encInvalidBytes {*}{ - ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} - ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} - ascii 80 strict {} 0 {} {Smallest invalid byte} - ascii 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} - ascii 82 tcl8 \u201A -1 {knownBug} {map to cp1252} - ascii 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} - ascii 84 tcl8 \u201E -1 {knownBug} {map to cp1252} - ascii 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} - ascii 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} - ascii 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} - ascii 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} - ascii 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} - ascii 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} - ascii 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} - ascii 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} - ascii 8D tcl8 \u008D -1 {knownBug} {map to cp1252} - ascii 8E tcl8 \u017D -1 {knownBug} {map to cp1252} - ascii 8F tcl8 \u008F -1 {knownBug} {map to cp1252} - ascii 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} - ascii 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} - ascii 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} - ascii 93 tcl8 \u201C -1 {knownBug} {map to cp1252} - ascii 94 tcl8 \u201D -1 {knownBug} {map to cp1252} - ascii 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} - ascii 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} - ascii 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} - ascii 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} - ascii 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} - ascii 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} - ascii 9B tcl8 \u203A -1 {knownBug} {map to cp1252} - ascii 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} - ascii 9D tcl8 \u009D -1 {knownBug} {map to cp1252} - ascii 9E tcl8 \u017E -1 {knownBug} {map to cp1252} - ascii 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} - - ascii FF tcl8 \u00FF -1 {} {Largest invalid byte} - ascii FF replace \uFFFD -1 {} {Largest invalid byte} - ascii FF strict {} 0 {} {Largest invalid byte} -} - -# utf-8 - valid sequences based on Table 3.7 in the Unicode -# standard. -# -# Code Points First Second Third Fourth Byte -# U+0000..U+007F 00..7F -# U+0080..U+07FF C2..DF 80..BF -# U+0800..U+0FFF E0 A0..BF 80..BF -# U+1000..U+CFFF E1..EC 80..BF 80..BF -# U+D000..U+D7FF ED 80..9F 80..BF -# U+E000..U+FFFF EE..EF 80..BF 80..BF -# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF -# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF -# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF -# -# Tests below are based on the "gaps" in the above table. Note ascii test -# values are repeated because internally a different code path is used -# (UtfToUtfProc). -# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 -lappend encInvalidBytes {*}{ - utf-8 80 tcl8 \u20AC -1 {} {map to cp1252} - utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} - utf-8 80 strict {} 0 {} {Smallest invalid byte} - utf-8 81 tcl8 \u0081 -1 {} {map to cp1252} - utf-8 82 tcl8 \u201A -1 {} {map to cp1252} - utf-8 83 tcl8 \u0192 -1 {} {map to cp1252} - utf-8 84 tcl8 \u201E -1 {} {map to cp1252} - utf-8 85 tcl8 \u2026 -1 {} {map to cp1252} - utf-8 86 tcl8 \u2020 -1 {} {map to cp1252} - utf-8 87 tcl8 \u2021 -1 {} {map to cp1252} - utf-8 88 tcl8 \u02C6 -1 {} {map to cp1252} - utf-8 89 tcl8 \u2030 -1 {} {map to cp1252} - utf-8 8A tcl8 \u0160 -1 {} {map to cp1252} - utf-8 8B tcl8 \u2039 -1 {} {map to cp1252} - utf-8 8C tcl8 \u0152 -1 {} {map to cp1252} - utf-8 8D tcl8 \u008D -1 {} {map to cp1252} - utf-8 8E tcl8 \u017D -1 {} {map to cp1252} - utf-8 8F tcl8 \u008F -1 {} {map to cp1252} - utf-8 90 tcl8 \u0090 -1 {} {map to cp1252} - utf-8 91 tcl8 \u2018 -1 {} {map to cp1252} - utf-8 92 tcl8 \u2019 -1 {} {map to cp1252} - utf-8 93 tcl8 \u201C -1 {} {map to cp1252} - utf-8 94 tcl8 \u201D -1 {} {map to cp1252} - utf-8 95 tcl8 \u2022 -1 {} {map to cp1252} - utf-8 96 tcl8 \u2013 -1 {} {map to cp1252} - utf-8 97 tcl8 \u2014 -1 {} {map to cp1252} - utf-8 98 tcl8 \u02DC -1 {} {map to cp1252} - utf-8 99 tcl8 \u2122 -1 {} {map to cp1252} - utf-8 9A tcl8 \u0161 -1 {} {map to cp1252} - utf-8 9B tcl8 \u203A -1 {} {map to cp1252} - utf-8 9C tcl8 \u0153 -1 {} {map to cp1252} - utf-8 9D tcl8 \u009D -1 {} {map to cp1252} - utf-8 9E tcl8 \u017E -1 {} {map to cp1252} - utf-8 9F tcl8 \u0178 -1 {} {map to cp1252} - - utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} - utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} - utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} - utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} - utf-8 C080 strict {} 0 {} {C080 -> invalid} - utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} - utf-8 C0A2 tcl8 \u00C0\u00A2 -1 {} {websec.github.io - A} - utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A} - utf-8 C0A2 strict {} 0 {} {websec.github.io - A} - utf-8 C0A7 tcl8 \u00C0\u00A7 -1 {} {websec.github.io - double quote} - utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote} - utf-8 C0A7 strict {} 0 {} {websec.github.io - double quote} - utf-8 C0AE tcl8 \u00C0\u00AE -1 {} {websec.github.io - full stop} - utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop} - utf-8 C0AE strict {} 0 {} {websec.github.io - full stop} - utf-8 C0AF tcl8 \u00C0\u00AF -1 {} {websec.github.io - solidus} - utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus} - utf-8 C0AF strict {} 0 {} {websec.github.io - solidus} - - utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} - utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} - utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} - utf-8 C181 tcl8 \u00C1\u0081 -1 {} {websec.github.io - base test (A)} - utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)} - utf-8 C181 strict {} 0 {} {websec.github.io - base test (A)} - utf-8 C19C tcl8 \u00C1\u0153 -1 {} {websec.github.io - reverse solidus} - utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} - utf-8 C19C strict {} 0 {} {websec.github.io - reverse solidus} - - utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte} - utf-8 C2 replace \uFFFD -1 {} {Missing trail byte} - utf-8 C2 strict {} 0 {} {Missing trail byte} - utf-8 C27F tcl8 \u00C2\x7F -1 {} {Trail byte must be 80:BF} - utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} - utf-8 C27F strict {} 0 {} {Trail byte must be 80:BF} - utf-8 DF tcl8 \u00DF -1 {} {Missing trail byte} - utf-8 DF replace \uFFFD -1 {} {Missing trail byte} - utf-8 DF strict {} 0 {} {Missing trail byte} - utf-8 DF7F tcl8 \u00DF\x7F -1 {} {Trail byte must be 80:BF} - utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} - utf-8 DF7F strict {} 0 {} {Trail byte must be 80:BF} - utf-8 DFE0A080 tcl8 \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence} - utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence} - utf-8 DFE0A080 strict {} 0 {} {Invalid trail byte is start of valid sequence} - - utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte} - utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} - utf-8 E0 strict {} 0 {} {Missing trail byte} - utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF} - utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} - utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} - utf-8 E0819C tcl8 \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus} - utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} - utf-8 E0819C strict {} 0 {} {websec.github.io - reverse solidus} - utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF} - utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} - utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} - utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte} - utf-8 E0A0 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 E0A0 strict {} 0 {} {Missing second trail byte} - utf-8 E0BF tcl8 \u00E0\u00BF -1 {} {Missing second trail byte} - utf-8 E0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 E0BF strict {} 0 {} {Missing second trail byte} - utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 E0BF7F tcl8 \u00E0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 E0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 E0BF7F strict {} 0 {} {Second trail byte must be 80:BF} - - utf-8 E1 tcl8 \u00E1 -1 {} {Missing trail byte} - utf-8 E1 replace \uFFFD -1 {} {Missing trail byte} - utf-8 E1 strict {} 0 {} {Missing trail byte} - utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF} - utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} - utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF} - utf-8 E181 tcl8 \u00E1\u0081 -1 {} {Missing second trail byte} - utf-8 E181 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 E181 strict {} 0 {} {Missing second trail byte} - utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte} - utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 E1BF strict {} 0 {} {Missing second trail byte} - utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 E1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 E1BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EC tcl8 \u00EC -1 {} {Missing trail byte} - utf-8 EC replace \uFFFD -1 {} {Missing trail byte} - utf-8 EC strict {} 0 {} {Missing trail byte} - utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF} - utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} - utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF} - utf-8 EC81 tcl8 \u00EC\u0081 -1 {} {Missing second trail byte} - utf-8 EC81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EC81 strict {} 0 {} {Missing second trail byte} - utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte} - utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 ECBF strict {} 0 {} {Missing second trail byte} - utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF} - - utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} - utf-8 ED replace \uFFFD -1 {} {Missing trail byte} - utf-8 ED strict {} 0 {} {Missing trail byte} - utf-8 ED7F tcl8 \u00ED\u7F -1 {} {First trail byte must be 80:9F} - utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} - utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} - utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {} {First trail byte must be 80:9F} - utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} - utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} - utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte} - utf-8 ED81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 ED81 strict {} 0 {} {Missing second trail byte} - utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte} - utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EDBF strict {} 0 {} {Missing second trail byte} - utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} - utf-8 EDA080 replace \uFFFD -1 {} {High surrogate} - utf-8 EDA080 strict {} 0 {} {High surrogate} - utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} - utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate} - utf-8 EDAFBF strict {} 0 {} {High surrogate} - utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} - utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate} - utf-8 EDB080 strict {} 0 {} {Low surrogate} - utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate} - utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate} - utf-8 EDBFBF strict {} 0 {} {Low surrogate} - utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair} - utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair} - utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair} - utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} - - utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte} - utf-8 EE replace \uFFFD -1 {} {Missing trail byte} - utf-8 EE strict {} 0 {} {Missing trail byte} - utf-8 EE7F tcl8 \u00EE\u7F -1 {} {First trail byte must be 80:BF} - utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} - utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EED0 tcl8 \u00EE\u00D0 -1 {} {First trail byte must be 80:BF} - utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} - utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte} - utf-8 EE81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EE81 strict {} 0 {} {Missing second trail byte} - utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte} - utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EEBF strict {} 0 {} {Missing second trail byte} - utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 EEBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 EEBF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte} - utf-8 EF replace \uFFFD -1 {} {Missing trail byte} - utf-8 EF strict {} 0 {} {Missing trail byte} - utf-8 EF7F tcl8 \u00EF\u7F -1 {} {First trail byte must be 80:BF} - utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} - utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {} {First trail byte must be 80:BF} - utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} - utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte} - utf-8 EF81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EF81 strict {} 0 {} {Missing second trail byte} - utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte} - utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 EFBF strict {} 0 {} {Missing second trail byte} - utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 EFBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 EFBF7F strict {} 0 {} {Second trail byte must be 80:BF} - - utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} - utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} - utf-8 F0 strict {} 0 {} {Missing trail byte} - utf-8 F080 tcl8 \u00F0\u20AC -1 {} {First trail byte must be 90:BF} - utf-8 F080 replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} - utf-8 F080 strict {} 0 {} {First trail byte must be 90:BF} - utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF} - utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} - utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} - utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {} {First trail byte must be 90:BF} - utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF} - utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF} - utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte} - utf-8 F090 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F090 strict {} 0 {} {Missing second trail byte} - utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte} - utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F0BF strict {} 0 {} {Missing second trail byte} - utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F0BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F090BF tcl8 \u00F0\u0090\u00BF -1 {} {Missing third trail byte} - utf-8 F090BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F090BF strict {} 0 {} {Missing third trail byte} - utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte} - utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F0BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} - utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF} - utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} - utf-8 F090BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F090BFD0 strict {} 0 {} {Third trail byte must be 80:BF} - - utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte} - utf-8 F1 replace \uFFFD -1 {} {Missing trail byte} - utf-8 F1 strict {} 0 {} {Missing trail byte} - utf-8 F17F tcl8 \u00F1\u7F -1 {} {First trail byte must be 80:BF} - utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} - utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {} {First trail byte must be 80:BF} - utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} - utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte} - utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F180 strict {} 0 {} {Missing second trail byte} - utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} - utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F1BF strict {} 0 {} {Missing second trail byte} - utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {} {Missing third trail byte} - utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F180BF strict {} 0 {} {Missing third trail byte} - utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte} - utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F1BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} - utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF} - utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} - utf-8 F180BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F180BFD0 strict {} 0 {} {Third trail byte must be 80:BF} - utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte} - utf-8 F3 replace \uFFFD -1 {} {Missing trail byte} - utf-8 F3 strict {} 0 {} {Missing trail byte} - utf-8 F37F tcl8 \u00F3\x7F -1 {} {First trail byte must be 80:BF} - utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} - utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {} {First trail byte must be 80:BF} - utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} - utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} - utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte} - utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F380 strict {} 0 {} {Missing second trail byte} - utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} - utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F3BF strict {} 0 {} {Missing second trail byte} - utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {} {Missing third trail byte} - utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F380BF strict {} 0 {} {Missing third trail byte} - utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte} - utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F3BF81 strict {} 0 {} {Missing third trail byte} - utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} - utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF} - utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} - utf-8 F380BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F380BFD0 strict {} 0 {} {Third trail byte must be 80:BF} - - utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte} - utf-8 F4 replace \uFFFD -1 {} {Missing trail byte} - utf-8 F4 strict {} 0 {} {Missing trail byte} - utf-8 F47F tcl8 \u00F4\u7F -1 {} {First trail byte must be 80:8F} - utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F} - utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F} - utf-8 F490 tcl8 \u00F4\u0090 -1 {} {First trail byte must be 80:8F} - utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} - utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} - utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte} - utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F480 strict {} 0 {} {Missing second trail byte} - utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} - utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte} - utf-8 F48F strict {} 0 {} {Missing second trail byte} - utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF} - utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} - utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF} - utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {} {Missing third trail byte} - utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F48081 strict {} 0 {} {Missing third trail byte} - utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte} - utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} - utf-8 F48F81 strict {} 0 {} {Missing third trail byte} - utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {} {Third trail byte must be 80:BF} - utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF} - utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} - utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} - utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF} - - utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} - utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} - utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} - utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} - utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} - utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} - - utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8} - utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9} - utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10} - utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11} -} - -# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated -# based on these depending on platform endianness. Note truncated tests can only -# happen when the sequence is at the end (including by itself) Thus {solo tail} -# in some cases. -lappend encInvalidBytes {*}{ - utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} - utf-16le 41 strict {} 0 {solo tail} {Truncated} - utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate} - utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate} - utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate} - utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate} - utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate} - utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate} - - utf-16be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-16be 41 replace \uFFFD -1 {solo tail} {Truncated} - utf-16be 41 strict {} 0 {solo tail} {Truncated} - utf-16be D800 tcl8 \uD800 -1 {} {Missing low surrogate} - utf-16be D800 replace \uFFFD -1 {knownBug} {Missing low surrogate} - utf-16be D800 strict {} 0 {knownBug} {Missing low surrogate} - utf-16be DC00 tcl8 \uDC00 -1 {} {Missing high surrogate} - utf-16be DC00 replace \uFFFD -1 {knownBug} {Missing high surrogate} - utf-16be DC00 strict {} 0 {knownBug} {Missing high surrogate} -} - -# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated -# based on these depending on platform endianness. Note truncated tests can only -# happen when the sequence is at the end (including by itself) Thus {solo tail} -# in some cases. -lappend encInvalidBytes {*}{ - utf-32le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32le 41 replace \uFFFD -1 {solo} {Truncated} - utf-32le 41 strict {} 0 {solo tail} {Truncated} - utf-32le 4100 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32le 4100 replace \uFFFD -1 {solo} {Truncated} - utf-32le 4100 strict {} 0 {solo tail} {Truncated} - utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32le 410000 replace \uFFFD -1 {solo} {Truncated} - utf-32le 410000 strict {} 0 {solo tail} {Truncated} - utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate} - utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate} - utf-32le 00D80000 strict {} 0 {} {High-surrogate} - utf-32le 00DC0000 tcl8 \uDC00 -1 {} {Low-surrogate} - utf-32le 00DC0000 replace \uFFFD -1 {} {Low-surrogate} - utf-32le 00DC0000 strict {} 0 {} {Low-surrogate} - utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} - utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} - utf-32le 00D8000000DC0000 strict {} 0 {} {High-low-surrogate-pair} - utf-32le 00001100 tcl8 \UFFFD -1 {} {Out of range} - utf-32le 00001100 replace \UFFFD -1 {} {Out of range} - utf-32le 00001100 strict {} 0 {} {Out of range} - utf-32le FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} - utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range} - utf-32le FFFFFFFF strict {} 0 {} {Out of range} - - utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32be 41 replace \uFFFD -1 {solo tail} {Truncated} - utf-32be 41 strict {} 0 {solo tail} {Truncated} - utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32be 0041 replace \uFFFD -1 {solo} {Truncated} - utf-32be 0041 strict {} 0 {solo tail} {Truncated} - utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated} - utf-32be 000041 replace \uFFFD -1 {solo} {Truncated} - utf-32be 000041 strict {} 0 {solo tail} {Truncated} - utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate} - utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate} - utf-32be 0000D800 strict {} 0 {} {High-surrogate} - utf-32be 0000DC00 tcl8 \uDC00 -1 {} {Low-surrogate} - utf-32be 0000DC00 replace \uFFFD -1 {} {Low-surrogate} - utf-32be 0000DC00 strict {} 0 {} {Low-surrogate} - utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} - utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} - utf-32be 0000D8000000DC00 strict {} 0 {} {High-low-surrogate-pair} - utf-32be 00110000 tcl8 \UFFFD -1 {} {Out of range} - utf-32be 00110000 replace \UFFFD -1 {} {Out of range} - utf-32be 00110000 strict {} 0 {} {Out of range} - utf-32be FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} - utf-32be FFFFFFFF replace \UFFFD -1 {} {Out of range} - utf-32be FFFFFFFF strict {} 0 {} {Out of range} -} - - -# Strings that cannot be encoded for specific encoding / profiles -# {encoding string profile exptedresult expectedfailindex ctrl comment} -# should be unique for test ids to be unique. -# Note utf-16, utf-32 missing because they are automatically -# generated based on le/be versions. -# Each entry potentially results in generation of multiple tests. -# This is controlled by the ctrl field. This should be a list of -# zero or more of the following: -# solo - the test data is the string itself -# lead - the test data is the string followed by a valid suffix -# tail - the test data is the string preceded by a prefix -# middle - the test data is the string wrapped by a prefix and suffix -# If the ctrl field is empty it is treated as all of the above -# Note if there is any other value by itself, it will cause the test to -# be skipped. This is intentional to skip known bugs. -# TODO - other encodings -# TODO - out of range code point (note cannot be generated by \U notation) -lappend encUnencodableStrings {*}{ - ascii \u00e0 tcl8 3f -1 {} {unencodable} - ascii \u00e0 strict {} 0 {} {unencodable} - - iso8859-1 \u0141 tcl8 3f -1 {} unencodable - iso8859-1 \u0141 strict {} 0 {} unencodable - - utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate - utf-8 \uD800 strict {} 0 {} High-surrogate - utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate - utf-8 \uDC00 strict {} 0 {} High-surrogate -} +source [file join [file dirname [info script]] encodingVectors.tcl] -# Generated tests comparing against ICU -# TODO - commented out for now as generating a lot of mismatches. -# source [file join [file dirname [info script]] icuUcmTests.tcl] # Maps utf-{16,32}{le,be} to utf-16, utf-32 and # others to "". Used to test utf-16, utf-32 based diff --git a/tests/encodingVectors.tcl b/tests/encodingVectors.tcl new file mode 100644 index 0000000..986e221 --- /dev/null +++ b/tests/encodingVectors.tcl @@ -0,0 +1,655 @@ +# This file contains test vectors for verifying various encodings. They are +# stored in a common file so that they can be sourced into the various test +# modules that are dependent on encodings. This file contains statically defined +# test vectors. In addition, it sources the ICU-generated test vectors from +# icuUcmTests.tcl. +# +# Note that sourcing the file will reinitialize any existing encoding test +# vectors. +# + +# List of defined encoding profiles +set encProfiles {tcl8 strict replace} +set encDefaultProfile tcl8; # Should reflect the default from implementation + +# encValidStrings - Table of valid strings. +# +# Each row is +# The pair should be unique for generated test ids to be unique. +# STR is a string that can be encoded in the encoding ENCODING resulting +# in the byte sequence BYTES. The CTRL field is a list that controls test +# generation. It may contain zero or more of `solo`, `lead`, `tail` and +# `middle` indicating that the generated tests should include the string +# by itself, as the lead of a longer string, as the tail of a longer string +# and in the middle of a longer string. If CTRL is empty, it is treated as +# containing all four of the above. The CTRL field may also contain the +# words knownBug or knownW3C which will cause the test generation for that +# vector to be skipped. +# +# utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +set encValidStrings {}; # Reset the table + +lappend encValidStrings {*}{ + ascii \u0000 00 {} {Lowest ASCII} + ascii \u007F 7F knownBug {Highest ASCII} + ascii \u007D 7D {} {Brace - just to verify test scripts are escaped correctly} + ascii \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly} + + utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1} + utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1} + utf-8 \u0080 C280 {} {Unicode Table 3.7 Row 2} + utf-8 \u07FF DFBF {} {Unicode Table 3.7 Row 2} + utf-8 \u0800 E0A080 {} {Unicode Table 3.7 Row 3} + utf-8 \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3} + utf-8 \u1000 E18080 {} {Unicode Table 3.7 Row 4} + utf-8 \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4} + utf-8 \uD000 ED8080 {} {Unicode Table 3.7 Row 5} + utf-8 \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5} + utf-8 \uE000 EE8080 {} {Unicode Table 3.7 Row 6} + utf-8 \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6} + utf-8 \U10000 F0908080 {} {Unicode Table 3.7 Row 7} + utf-8 \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7} + utf-8 \U40000 F1808080 {} {Unicode Table 3.7 Row 8} + utf-8 \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8} + utf-8 \U100000 F4808080 {} {Unicode Table 3.7 Row 9} + utf-8 \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9} + utf-8 A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5} + + utf-16le \u0000 0000 {} {Lowest code unit} + utf-16le \uD7FF FFD7 {} {Below high surrogate range} + utf-16le \uE000 00E0 {} {Above low surrogate range} + utf-16le \uFFFF FFFF {} {Highest code unit} + utf-16le \U010000 00D800DC {} {First surrogate pair} + utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair} + utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5} + + utf-16be \u0000 0000 {} {Lowest code unit} + utf-16be \uD7FF D7FF {} {Below high surrogate range} + utf-16be \uE000 E000 {} {Above low surrogate range} + utf-16be \uFFFF FFFF {} {Highest code unit} + utf-16be \U010000 D800DC00 {} {First surrogate pair} + utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair} + utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5} + + utf-32le \u0000 00000000 {} {Lowest code unit} + utf-32le \uFFFF FFFF0000 {} {Highest BMP} + utf-32le \U010000 00000100 {} {First supplementary} + utf-32le \U10FFFF ffff1000 {} {Last supplementary} + utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5} + + utf-32be \u0000 00000000 {} {Lowest code unit} + utf-32be \uFFFF 0000FFFF {} {Highest BMP} + utf-32be \U010000 00010000 {} {First supplementary} + utf-32be \U10FFFF 0010FFFF {} {Last supplementary} + utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5} +} + +# encInvalidBytes - Table of invalid byte sequences +# These are byte sequences that should appear for an encoding. Each row is +# of the form +# +# The triple should be unique for test ids to be +# unique. BYTES is a byte sequence that is invalid. EXPECTEDRESULT is the +# expected string when the bytes are decoded using the PROFILE profile. +# FAILINDEX gives the expected index of the invalid byte under that profile. The +# CTRL field is a list that controls test generation. It may contain zero or +# more of `solo`, `lead`, `tail` and `middle` indicating that the generated the +# tail of a longer and in the middle of a longer string. If empty, it is treated +# as containing all four of the above. The CTRL field may also contain the words +# knownBug or knownW3C which will cause the test generation for that vector to +# be skipped. +# +# utf-32 missing because they are automatically generated based on le/be +# versions. +set encInvalidBytes {}; # Reset the table + +# ascii - Any byte above 127 is invalid and is mapped +# to the same numeric code point except for the range +# 80-9F which is treated as cp1252. +# This tests the TableToUtfProc code path. +lappend encInvalidBytes {*}{ + ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} + ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} + ascii 80 strict {} 0 {} {Smallest invalid byte} + ascii 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} + ascii 82 tcl8 \u201A -1 {knownBug} {map to cp1252} + ascii 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} + ascii 84 tcl8 \u201E -1 {knownBug} {map to cp1252} + ascii 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} + ascii 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} + ascii 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} + ascii 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} + ascii 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} + ascii 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} + ascii 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} + ascii 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} + ascii 8D tcl8 \u008D -1 {knownBug} {map to cp1252} + ascii 8E tcl8 \u017D -1 {knownBug} {map to cp1252} + ascii 8F tcl8 \u008F -1 {knownBug} {map to cp1252} + ascii 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} + ascii 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} + ascii 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} + ascii 93 tcl8 \u201C -1 {knownBug} {map to cp1252} + ascii 94 tcl8 \u201D -1 {knownBug} {map to cp1252} + ascii 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} + ascii 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} + ascii 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} + ascii 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} + ascii 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} + ascii 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} + ascii 9B tcl8 \u203A -1 {knownBug} {map to cp1252} + ascii 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} + ascii 9D tcl8 \u009D -1 {knownBug} {map to cp1252} + ascii 9E tcl8 \u017E -1 {knownBug} {map to cp1252} + ascii 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} + + ascii FF tcl8 \u00FF -1 {} {Largest invalid byte} + ascii FF replace \uFFFD -1 {} {Largest invalid byte} + ascii FF strict {} 0 {} {Largest invalid byte} +} + +# utf-8 - valid sequences based on Table 3.7 in the Unicode +# standard. +# +# Code Points First Second Third Fourth Byte +# U+0000..U+007F 00..7F +# U+0080..U+07FF C2..DF 80..BF +# U+0800..U+0FFF E0 A0..BF 80..BF +# U+1000..U+CFFF E1..EC 80..BF 80..BF +# U+D000..U+D7FF ED 80..9F 80..BF +# U+E000..U+FFFF EE..EF 80..BF 80..BF +# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF +# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF +# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF +# +# Tests below are based on the "gaps" in the above table. Note ascii test +# values are repeated because internally a different code path is used +# (UtfToUtfProc). +# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 +lappend encInvalidBytes {*}{ + utf-8 80 tcl8 \u20AC -1 {} {map to cp1252} + utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} + utf-8 80 strict {} 0 {} {Smallest invalid byte} + utf-8 81 tcl8 \u0081 -1 {} {map to cp1252} + utf-8 82 tcl8 \u201A -1 {} {map to cp1252} + utf-8 83 tcl8 \u0192 -1 {} {map to cp1252} + utf-8 84 tcl8 \u201E -1 {} {map to cp1252} + utf-8 85 tcl8 \u2026 -1 {} {map to cp1252} + utf-8 86 tcl8 \u2020 -1 {} {map to cp1252} + utf-8 87 tcl8 \u2021 -1 {} {map to cp1252} + utf-8 88 tcl8 \u02C6 -1 {} {map to cp1252} + utf-8 89 tcl8 \u2030 -1 {} {map to cp1252} + utf-8 8A tcl8 \u0160 -1 {} {map to cp1252} + utf-8 8B tcl8 \u2039 -1 {} {map to cp1252} + utf-8 8C tcl8 \u0152 -1 {} {map to cp1252} + utf-8 8D tcl8 \u008D -1 {} {map to cp1252} + utf-8 8E tcl8 \u017D -1 {} {map to cp1252} + utf-8 8F tcl8 \u008F -1 {} {map to cp1252} + utf-8 90 tcl8 \u0090 -1 {} {map to cp1252} + utf-8 91 tcl8 \u2018 -1 {} {map to cp1252} + utf-8 92 tcl8 \u2019 -1 {} {map to cp1252} + utf-8 93 tcl8 \u201C -1 {} {map to cp1252} + utf-8 94 tcl8 \u201D -1 {} {map to cp1252} + utf-8 95 tcl8 \u2022 -1 {} {map to cp1252} + utf-8 96 tcl8 \u2013 -1 {} {map to cp1252} + utf-8 97 tcl8 \u2014 -1 {} {map to cp1252} + utf-8 98 tcl8 \u02DC -1 {} {map to cp1252} + utf-8 99 tcl8 \u2122 -1 {} {map to cp1252} + utf-8 9A tcl8 \u0161 -1 {} {map to cp1252} + utf-8 9B tcl8 \u203A -1 {} {map to cp1252} + utf-8 9C tcl8 \u0153 -1 {} {map to cp1252} + utf-8 9D tcl8 \u009D -1 {} {map to cp1252} + utf-8 9E tcl8 \u017E -1 {} {map to cp1252} + utf-8 9F tcl8 \u0178 -1 {} {map to cp1252} + + utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} + utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} + utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} + utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} + utf-8 C080 strict {} 0 {} {C080 -> invalid} + utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} + utf-8 C0A2 tcl8 \u00C0\u00A2 -1 {} {websec.github.io - A} + utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A} + utf-8 C0A2 strict {} 0 {} {websec.github.io - A} + utf-8 C0A7 tcl8 \u00C0\u00A7 -1 {} {websec.github.io - double quote} + utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote} + utf-8 C0A7 strict {} 0 {} {websec.github.io - double quote} + utf-8 C0AE tcl8 \u00C0\u00AE -1 {} {websec.github.io - full stop} + utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop} + utf-8 C0AE strict {} 0 {} {websec.github.io - full stop} + utf-8 C0AF tcl8 \u00C0\u00AF -1 {} {websec.github.io - solidus} + utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus} + utf-8 C0AF strict {} 0 {} {websec.github.io - solidus} + + utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} + utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} + utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} + utf-8 C181 tcl8 \u00C1\u0081 -1 {} {websec.github.io - base test (A)} + utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)} + utf-8 C181 strict {} 0 {} {websec.github.io - base test (A)} + utf-8 C19C tcl8 \u00C1\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 C19C strict {} 0 {} {websec.github.io - reverse solidus} + + utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte} + utf-8 C2 replace \uFFFD -1 {} {Missing trail byte} + utf-8 C2 strict {} 0 {} {Missing trail byte} + utf-8 C27F tcl8 \u00C2\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DF tcl8 \u00DF -1 {} {Missing trail byte} + utf-8 DF replace \uFFFD -1 {} {Missing trail byte} + utf-8 DF strict {} 0 {} {Missing trail byte} + utf-8 DF7F tcl8 \u00DF\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DFE0A080 tcl8 \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 strict {} 0 {} {Invalid trail byte is start of valid sequence} + + utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte} + utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E0 strict {} 0 {} {Missing trail byte} + utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF} + utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0819C tcl8 \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C strict {} 0 {} {websec.github.io - reverse solidus} + utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF} + utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte} + utf-8 E0A0 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0A0 strict {} 0 {} {Missing second trail byte} + utf-8 E0BF tcl8 \u00E0\u00BF -1 {} {Missing second trail byte} + utf-8 E0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0BF strict {} 0 {} {Missing second trail byte} + utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F tcl8 \u00E0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 E1 tcl8 \u00E1 -1 {} {Missing trail byte} + utf-8 E1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E1 strict {} 0 {} {Missing trail byte} + utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 E181 tcl8 \u00E1\u0081 -1 {} {Missing second trail byte} + utf-8 E181 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E181 strict {} 0 {} {Missing second trail byte} + utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte} + utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E1BF strict {} 0 {} {Missing second trail byte} + utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EC tcl8 \u00EC -1 {} {Missing trail byte} + utf-8 EC replace \uFFFD -1 {} {Missing trail byte} + utf-8 EC strict {} 0 {} {Missing trail byte} + utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 EC81 tcl8 \u00EC\u0081 -1 {} {Missing second trail byte} + utf-8 EC81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EC81 strict {} 0 {} {Missing second trail byte} + utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte} + utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ECBF strict {} 0 {} {Missing second trail byte} + utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} + utf-8 ED replace \uFFFD -1 {} {Missing trail byte} + utf-8 ED strict {} 0 {} {Missing trail byte} + utf-8 ED7F tcl8 \u00ED\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} + utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} + utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte} + utf-8 ED81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ED81 strict {} 0 {} {Missing second trail byte} + utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte} + utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EDBF strict {} 0 {} {Missing second trail byte} + utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} + utf-8 EDA080 replace \uFFFD -1 {} {High surrogate} + utf-8 EDA080 strict {} 0 {} {High surrogate} + utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} + utf-8 EDAFBF replace \uFFFD -1 {} {High surrogate} + utf-8 EDAFBF strict {} 0 {} {High surrogate} + utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} + utf-8 EDB080 replace \uFFFD -1 {} {Low surrogate} + utf-8 EDB080 strict {} 0 {} {Low surrogate} + utf-8 EDBFBF tcl8 \uDFFF -1 {} {Low surrogate} + utf-8 EDBFBF replace \uFFFD -1 {} {Low surrogate} + utf-8 EDBFBF strict {} 0 {} {Low surrogate} + utf-8 EDA080EDB080 tcl8 \U00010000 -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} + + utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte} + utf-8 EE replace \uFFFD -1 {} {Missing trail byte} + utf-8 EE strict {} 0 {} {Missing trail byte} + utf-8 EE7F tcl8 \u00EE\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EED0 tcl8 \u00EE\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte} + utf-8 EE81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EE81 strict {} 0 {} {Missing second trail byte} + utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte} + utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EEBF strict {} 0 {} {Missing second trail byte} + utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EEBF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte} + utf-8 EF replace \uFFFD -1 {} {Missing trail byte} + utf-8 EF strict {} 0 {} {Missing trail byte} + utf-8 EF7F tcl8 \u00EF\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte} + utf-8 EF81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EF81 strict {} 0 {} {Missing second trail byte} + utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte} + utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EFBF strict {} 0 {} {Missing second trail byte} + utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EFBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} + utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F0 strict {} 0 {} {Missing trail byte} + utf-8 F080 tcl8 \u00F0\u20AC -1 {} {First trail byte must be 90:BF} + utf-8 F080 replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F080 strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF} + utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {} {First trail byte must be 90:BF} + utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF} + utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte} + utf-8 F090 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F090 strict {} 0 {} {Missing second trail byte} + utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte} + utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F0BF strict {} 0 {} {Missing second trail byte} + utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F090BF tcl8 \u00F0\u0090\u00BF -1 {} {Missing third trail byte} + utf-8 F090BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F090BF strict {} 0 {} {Missing third trail byte} + utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F0BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F090BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte} + utf-8 F1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F1 strict {} 0 {} {Missing trail byte} + utf-8 F17F tcl8 \u00F1\u7F -1 {} {First trail byte must be 80:BF} + utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte} + utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F180 strict {} 0 {} {Missing second trail byte} + utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} + utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F1BF strict {} 0 {} {Missing second trail byte} + utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {} {Missing third trail byte} + utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F180BF strict {} 0 {} {Missing third trail byte} + utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F1BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F180BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte} + utf-8 F3 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F3 strict {} 0 {} {Missing trail byte} + utf-8 F37F tcl8 \u00F3\x7F -1 {} {First trail byte must be 80:BF} + utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte} + utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F380 strict {} 0 {} {Missing second trail byte} + utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} + utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F3BF strict {} 0 {} {Missing second trail byte} + utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {} {Missing third trail byte} + utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F380BF strict {} 0 {} {Missing third trail byte} + utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F3BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F380BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte} + utf-8 F4 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F4 strict {} 0 {} {Missing trail byte} + utf-8 F47F tcl8 \u00F4\u7F -1 {} {First trail byte must be 80:8F} + utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F} + utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F490 tcl8 \u00F4\u0090 -1 {} {First trail byte must be 80:8F} + utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} + utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte} + utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F480 strict {} 0 {} {Missing second trail byte} + utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} + utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F48F strict {} 0 {} {Missing second trail byte} + utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {} {Missing third trail byte} + utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48081 strict {} 0 {} {Missing third trail byte} + utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte} + utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48F81 strict {} 0 {} {Missing third trail byte} + utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} + utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} + utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} + utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} + + utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8} + utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9} + utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10} + utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11} +} + +# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 strict {} 0 {solo tail} {Truncated} + utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate} + utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate} + + utf-16be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 strict {} 0 {solo tail} {Truncated} + utf-16be D800 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16be D800 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16be D800 strict {} 0 {knownBug} {Missing low surrogate} + utf-16be DC00 tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16be DC00 replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16be DC00 strict {} 0 {knownBug} {Missing high surrogate} +} + +# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-32le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 41 replace \uFFFD -1 {solo} {Truncated} + utf-32le 41 strict {} 0 {solo tail} {Truncated} + utf-32le 4100 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 4100 replace \uFFFD -1 {solo} {Truncated} + utf-32le 4100 strict {} 0 {solo tail} {Truncated} + utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 410000 replace \uFFFD -1 {solo} {Truncated} + utf-32le 410000 strict {} 0 {solo tail} {Truncated} + utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate} + utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate} + utf-32le 00D80000 strict {} 0 {} {High-surrogate} + utf-32le 00DC0000 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32le 00DC0000 replace \uFFFD -1 {} {Low-surrogate} + utf-32le 00DC0000 strict {} 0 {} {Low-surrogate} + utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 strict {} 0 {} {High-low-surrogate-pair} + utf-32le 00001100 tcl8 \UFFFD -1 {} {Out of range} + utf-32le 00001100 replace \UFFFD -1 {} {Out of range} + utf-32le 00001100 strict {} 0 {} {Out of range} + utf-32le FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF strict {} 0 {} {Out of range} + + utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 strict {} 0 {solo tail} {Truncated} + utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 0041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 0041 strict {} 0 {solo tail} {Truncated} + utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 000041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 000041 strict {} 0 {solo tail} {Truncated} + utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate} + utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate} + utf-32be 0000D800 strict {} 0 {} {High-surrogate} + utf-32be 0000DC00 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32be 0000DC00 replace \uFFFD -1 {} {Low-surrogate} + utf-32be 0000DC00 strict {} 0 {} {Low-surrogate} + utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 strict {} 0 {} {High-low-surrogate-pair} + utf-32be 00110000 tcl8 \UFFFD -1 {} {Out of range} + utf-32be 00110000 replace \UFFFD -1 {} {Out of range} + utf-32be 00110000 strict {} 0 {} {Out of range} + utf-32be FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF strict {} 0 {} {Out of range} +} + +# Strings that cannot be encoded for specific encoding / profiles +# +# should be unique for test ids to be unique. +# See earlier comments about CTRL field. +# +# Note utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +# TODO - out of range code point (note cannot be generated by \U notation) +lappend encUnencodableStrings {*}{ + ascii \u00e0 tcl8 3f -1 {} {unencodable} + ascii \u00e0 strict {} 0 {} {unencodable} + + iso8859-1 \u0141 tcl8 3f -1 {} unencodable + iso8859-1 \u0141 strict {} 0 {} unencodable + + utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate + utf-8 \uD800 strict {} 0 {} High-surrogate + utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate + utf-8 \uDC00 strict {} 0 {} High-surrogate +} + + +# The icuUcmTests.tcl is generated by the tools/ucm2tests.tcl script +# and generates test vectors for the above tables for various encodings +# based on ICU UCM files. +# TODO - commented out for now as generating a lot of mismatches. +# source [file join [file dirname [info script]] icuUcmTests.tcl] -- cgit v0.12 From 99a24e7883c680bb555d044a04e458a57be677a1 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 24 Feb 2023 10:32:37 +0000 Subject: Raise error on invalid flags --- generic/tclEncoding.c | 24 +++++++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d969779..00ca5e8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1275,7 +1275,18 @@ Tcl_ExternalToUtfDStringEx( Tcl_Size dstLen; const char *srcStart = src; - Tcl_DStringInit(dstPtr); /* Must always be initialized before returning */ + /* DO FIRST - Must always be initialized before returning */ + Tcl_DStringInit(dstPtr); + + if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) { + /* TODO - what other flags are illegal? - See TIP 656 */ + Tcl_SetResult(interp, + "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", + TCL_STATIC); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + return TCL_ERROR; + } + dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; @@ -1559,7 +1570,18 @@ Tcl_UtfToExternalDStringEx( const char *srcStart = src; Tcl_Size dstLen; + /* DO FIRST - must always be initialized on return */ Tcl_DStringInit(dstPtr); + + if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) { + /* TODO - what other flags are illegal? - See TIP 656 */ + Tcl_SetResult(interp, + "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", + TCL_STATIC); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + return TCL_ERROR; + } + dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; -- cgit v0.12 From 58db3d68eb1d0fba5c0e0b3ffff602acbfb2a12a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 24 Feb 2023 13:34:15 +0000 Subject: Add teststringobj newunicode command to test invalid input to Tcl_NewUnicodeObj --- generic/tclTestObj.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index c9a910a..fa91d67 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1269,7 +1269,7 @@ TeststringobjCmd( static const char *const options[] = { "append", "appendstrings", "get", "get2", "length", "length2", "set", "set2", "setlength", "maxchars", "range", "appendself", - "appendself2", NULL + "appendself2", "newunicode", NULL }; if (objc < 3) { @@ -1513,7 +1513,24 @@ TeststringobjCmd( Tcl_AppendUnicodeToObj(varPtr[varIndex], unicode + length, size - length); Tcl_SetObjResult(interp, varPtr[varIndex]); break; - } + case 13: /* newunicode*/ + unicode = ckalloc((objc - 3) * sizeof(Tcl_UniChar)); + for (i = 0; i < (objc - 3); ++i) { + int val; + if (Tcl_GetIntFromObj(interp, objv[i + 3], &val) != TCL_OK) { + break; + } + unicode[i] = (Tcl_UniChar)val; + } + if (i < (objc-3)) { + ckfree(unicode); + return TCL_ERROR; + } + SetVarToObj(varPtr, varIndex, Tcl_NewUnicodeObj(unicode, objc - 3)); + Tcl_SetObjResult(interp, varPtr[varIndex]); + ckfree(unicode); + break; + } return TCL_OK; } -- cgit v0.12 -- cgit v0.12 From f0f2ee57f9f6423cc4fb56be376158f7e006739e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 03:03:46 +0000 Subject: Compiles and runs. Tests still to be ported. --- generic/tcl.decls | 14 +- generic/tcl.h | 28 ++- generic/tclCmdAH.c | 426 +++++++++++++++++--------------- generic/tclDecls.h | 28 ++- generic/tclEncoding.c | 666 ++++++++++++++++++++++++++++++++++++++------------ generic/tclIO.c | 149 ++++------- generic/tclIO.h | 5 - generic/tclInt.h | 14 ++ generic/tclUtil.c | 9 +- generic/tclZlib.c | 8 +- unix/tclUnixChan.c | 4 +- unix/tclUnixFCmd.c | 46 ++-- unix/tclUnixFile.c | 18 +- unix/tclUnixInit.c | 2 +- win/tclWinSock.c | 4 +- 15 files changed, 889 insertions(+), 532 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 3778de6..1608a88 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2500,13 +2500,17 @@ declare 656 { declare 657 { int Tcl_UniCharIsUnicode(int ch) } + +# TIP 656 declare 658 { - Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr) -} + int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, + const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr) +} declare 659 { - Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr) + int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, + const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr) } # TIP #511 diff --git a/generic/tcl.h b/generic/tcl.h index fa4da26..6040099 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1948,14 +1948,8 @@ typedef struct Tcl_EncodingType { * 0x00. Only valid for "utf-8" and "cesu-8". * This flag is implicit for external -> internal conversions, * optional for internal -> external conversions. - * TCL_ENCODING_NOCOMPLAIN - If set, the converter - * substitutes the problematic character(s) with - * one or more "close" characters in the - * destination buffer and then continues to - * convert the source. If clear, the converter returns - * immediately upon encountering an invalid byte sequence - * or a source character that has no mapping in the - * target encoding. Only for Tcl 9.x. + * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note + * these are bit masks. */ #define TCL_ENCODING_START 0x01 @@ -1970,7 +1964,23 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 -#define TCL_ENCODING_NOCOMPLAIN 0x40 +/* Reserve top byte for profile values (disjoint) */ +#define TCL_ENCODING_PROFILE_TCL8 0x01000000 +#define TCL_ENCODING_PROFILE_STRICT 0x02000000 +#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 +#define TCL_ENCODING_PROFILE_MASK 0xFF000000 +#define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) +#define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ + do { \ + (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ + (flags_) |= profile_; \ + } while (0) +/* Still being argued - For Tcl9, is the default strict? TODO */ +#if TCL_MAJOR_VERSION < 9 +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 +#else +#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ +#endif /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4df1216..c60a077 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -51,6 +51,7 @@ static Tcl_ObjCmdProc EncodingConvertfromObjCmd; static Tcl_ObjCmdProc EncodingConverttoObjCmd; static Tcl_ObjCmdProc EncodingDirsObjCmd; static Tcl_ObjCmdProc EncodingNamesObjCmd; +static Tcl_ObjCmdProc EncodingProfilesObjCmd; static Tcl_ObjCmdProc EncodingSystemObjCmd; static inline int ForeachAssignments(Tcl_Interp *interp, struct ForeachState *statePtr); @@ -386,6 +387,7 @@ TclInitEncodingCmd( {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, + {"profiles", EncodingProfilesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, {"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {NULL, NULL, NULL, NULL, NULL, 0} }; @@ -394,6 +396,121 @@ TclInitEncodingCmd( } /* + *------------------------------------------------------------------------ + * + * EncodingConvertParseOptions -- + * + * Common routine for parsing arguments passed to encoding convertfrom + * and encoding convertto. + * + * Results: + * TCL_OK or TCL_ERROR. + * + * Side effects: + * On success, + * - *encPtr is set to the encoding. Must be freed with Tcl_FreeEncoding + * if non-NULL + * - *dataObjPtr is set to the Tcl_Obj containing the data to encode or + * decode + * - *profilePtr is set to encoding error handling profile + * - *failVarPtr is set to -failindex option value or NULL + * On error, all of the above are uninitialized. + * + *------------------------------------------------------------------------ + */ +static int +EncodingConvertParseOptions ( + Tcl_Interp *interp, /* For error messages. May be NULL */ + int objc, /* Number of arguments */ + Tcl_Obj *const objv[], /* Argument objects as passed to command. */ + Tcl_Encoding *encPtr, /* Where to store the encoding */ + Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */ + int *profilePtr, /* Bit mask of encoding option profile */ + Tcl_Obj **failVarPtr /* Where to store -failindex option value */ +) +{ + static const char *const options[] = {"-profile", "-failindex", NULL}; + enum convertfromOptions { PROFILE, FAILINDEX } optIndex; + Tcl_Encoding encoding; + Tcl_Obj *dataObj; + Tcl_Obj *failVarObj; +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ +#else + int profile = TCL_ENCODING_PROFILE_TCL8; +#endif + + /* + * Possible combinations: + * 1) data -> objc = 2 + * 2) ?options? encoding data -> objc >= 3 + * It is intentional that specifying option forces encoding to be + * specified. Less prone to user error. This should have always been + * the case even in 8.6 imho where there were no options (ie (1) + * should never have been allowed) + */ + + if (objc == 1) { +numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ + Tcl_WrongNumArgs(interp, + 1, + objv, + "?-profile profile? ?-failindex var? encoding data"); + ((Interp *)interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; + Tcl_WrongNumArgs(interp, 1, objv, "data"); + return TCL_ERROR; + } + + failVarObj = NULL; + if (objc == 2) { + encoding = Tcl_GetEncoding(interp, NULL); + dataObj = objv[1]; + } else { + int argIndex; + for (argIndex = 1; argIndex < (objc-2); ++argIndex) { + if (Tcl_GetIndexFromObj( + interp, objv[argIndex], options, "option", 0, &optIndex) + != TCL_OK) { + return TCL_ERROR; + } + if (++argIndex == (objc - 2)) { + goto numArgsError; + } + switch (optIndex) { + case PROFILE: + if (TclEncodingProfileNameToId( + interp, Tcl_GetString(objv[argIndex]), &profile) + != TCL_OK) { + return TCL_ERROR; + } +#ifdef NOTNEEDED + /* TODO - next line probably not needed as the conversion + functions already take care of mapping profile to flags */ + profile = TclEncodingExternalFlagsToInternal(profile); +#endif + break; + case FAILINDEX: + failVarObj = objv[argIndex]; + break; + } + } + /* Get encoding after opts so no need to free it on option error */ + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) + != TCL_OK) { + return TCL_ERROR; + } + dataObj = objv[objc - 1]; + } + + *encPtr = encoding; + *dataObjPtr = dataObj; + *profilePtr = profile; + *failVarPtr = failVarObj; + + return TCL_OK; +} + +/* *---------------------------------------------------------------------- * * EncodingConvertfromObjCmd -- @@ -419,113 +536,65 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length = 0; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ - int flags = 0; - size_t result; - Tcl_Obj *failVarObj = NULL; - /* - * Decode parameters: - * Possible combinations: - * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -strict data -> objc = 3 - * 6) -strict encoding data -> objc = 4 - * 7) -failindex val data -> objc = 4 - * 8) -failindex val encoding data -> objc = 5 - */ + int flags; + int result; + Tcl_Obj *failVarObj; + Tcl_Size errorLocation; - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if (objc > 2 && objc < 7) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - bytesPtr = Tcl_GetString(objv[1]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - bytesPtr = Tcl_GetString(objv[2]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < 6) { - goto encConvFromError; - } - failVarObj = objv[3]; - objcUnprocessed -= 2; - } - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvFromError; - } - failVarObj = objv[2]; - flags = ENCODING_FAILINDEX; - objcUnprocessed -= 2; - bytesPtr = Tcl_GetString(objv[3]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed --; - } - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } - break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); - break; - default: - goto encConvFromError; - } - } else { - encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); - ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; - Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); + if (EncodingConvertParseOptions( + interp, objc, objv, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } /* - * Convert the string into a byte array in 'ds' + * Convert the string into a byte array in 'ds'. */ bytesPtr = (char *) Tcl_GetBytesFromObj(interp, data, &length); + if (bytesPtr == NULL) { return TCL_ERROR; } - result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, - flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { - if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - char buf[TCL_INTEGER_SPACE]; - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); + result = Tcl_ExternalToUtfDStringEx(interp, encoding, bytesPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + break; + } + + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + /* I hope, wide int will cover Tcl_Size data type */ + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + Tcl_NewWideIntObj(errorLocation), + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } } - /* * Note that we cannot use Tcl_DStringResult here because it will * truncate the string at the first null byte. @@ -533,9 +602,7 @@ EncodingConvertfromObjCmd( Tcl_SetObjResult(interp, Tcl_DStringToObj(&ds)); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; @@ -568,80 +635,14 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - size_t result; - int flags = 0; - Tcl_Obj *failVarObj = NULL; - - /* - * Decode parameters: - * Possible combinations: - * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -failindex val data -> objc = 4 - * 6) -failindex val encoding data -> objc = 5 - */ - - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if (objc > 2 && objc < 7) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - stringPtr = Tcl_GetString(objv[1]); - if (stringPtr[0] == '-' && stringPtr[1] == 'n' - && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - stringPtr = Tcl_GetString(objv[2]); - if (stringPtr[0] == '-' && stringPtr[1] == 'f' - && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { - /* at least two additional arguments needed */ - if (objc < 6) { - goto encConvToError; - } - failVarObj = objv[3]; - objcUnprocessed -= 2; - } - } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' - && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvToError; - } - failVarObj = objv[2]; - flags = TCL_ENCODING_STOPONERROR; - objcUnprocessed -= 2; - stringPtr = Tcl_GetString(objv[3]); - if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed --; - } - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } - break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); - break; - default: - goto encConvToError; - } - } else { - encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data"); - ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; - Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data"); + int result; + int flags; + Tcl_Obj *failVarObj; + Tcl_Size errorLocation; + if (EncodingConvertParseOptions( + interp, objc, objv, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } @@ -650,40 +651,53 @@ EncodingConverttoObjCmd( */ stringPtr = Tcl_GetStringFromObj(data, &length); - result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, - flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { - if (failVarObj != NULL) { - /* I hope, wide int will cover size_t data type */ - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - size_t pos = Tcl_NumUtfChars(stringPtr, result); - int ucs4; - char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&stringPtr[result], &ucs4); - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); + result = Tcl_UtfToExternalDStringEx(interp, encoding, stringPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + break; + } + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + /* I hope, wide int will cover Tcl_Size data type */ + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + Tcl_NewWideIntObj(errorLocation), + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } } + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); Tcl_DStringFree(&ds); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; @@ -768,6 +782,34 @@ EncodingNamesObjCmd( /* *----------------------------------------------------------------------------- * + * EncodingProfilesObjCmd -- + * + * This command returns a list of the available encoding profiles + * + * Results: + * Returns a standard Tcl result + * + *----------------------------------------------------------------------------- + */ + +int +EncodingProfilesObjCmd( + TCL_UNUSED(void *), + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Number of command line args */ + Tcl_Obj* const objv[]) /* Vector of command line args */ +{ + if (objc > 1) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return TCL_ERROR; + } + TclGetEncodingProfiles(interp); + return TCL_OK; +} + +/* + *----------------------------------------------------------------------------- + * * EncodingSystemObjCmd -- * * This command retrieves or changes the system encoding diff --git a/generic/tclDecls.h b/generic/tclDecls.h index f219500..bdc094d 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1766,13 +1766,17 @@ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); /* 657 */ EXTERN int Tcl_UniCharIsUnicode(int ch); /* 658 */ -EXTERN Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, - Tcl_DString *dsPtr); +EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, + Tcl_Encoding encoding, const char *src, + Tcl_Size srcLen, int flags, + Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr); /* 659 */ -EXTERN Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, Tcl_Size srcLen, int flags, - Tcl_DString *dsPtr); +EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, + Tcl_Encoding encoding, const char *src, + Tcl_Size srcLen, int flags, + Tcl_DString *dsPtr, + Tcl_Size *errorLocationPtr); /* 660 */ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, int sigNumber); @@ -2529,8 +2533,8 @@ typedef struct TclStubs { const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */ - Tcl_Size (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ - Tcl_Size (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 659 */ + int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */ + int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */ int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */ int (*tcl_ListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */ int (*tcl_ListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */ @@ -3956,12 +3960,12 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_UtfToExternalDString #define Tcl_UtfToExternalDString(encoding, src, len, ds) \ - (Tcl_UtfToExternalDStringEx((encoding), (src), (len), \ - TCL_ENCODING_NOCOMPLAIN, (ds)), Tcl_DStringValue(ds)) + (Tcl_UtfToExternalDStringEx(NULL, (encoding), (src), (len), \ + TCL_ENCODING_PROFILE_TCL8, (ds), NULL), Tcl_DStringValue(ds)) #undef Tcl_ExternalToUtfDString #define Tcl_ExternalToUtfDString(encoding, src, len, ds) \ - (Tcl_ExternalToUtfDStringEx((encoding), (src), (len), \ - TCL_ENCODING_NOCOMPLAIN, (ds)), Tcl_DStringValue(ds)) + (Tcl_ExternalToUtfDStringEx(NULL, (encoding), (src), (len), \ + TCL_ENCODING_PROFILE_TCL8, (ds), NULL), Tcl_DStringValue(ds)) #if defined(USE_TCL_STUBS) # if defined(_WIN32) && defined(_WIN64) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..68f22b0 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -188,6 +188,32 @@ static Tcl_Encoding systemEncoding = NULL; Tcl_Encoding tclIdentityEncoding = NULL; /* + * Names of encoding profiles and corresponding integer values + */ +static struct TclEncodingProfiles { + const char *name; + int value; +} encodingProfiles[] = { + {"tcl8", TCL_ENCODING_PROFILE_TCL8}, + {"strict", TCL_ENCODING_PROFILE_STRICT}, + {"replace", TCL_ENCODING_PROFILE_REPLACE}, +}; +#define PROFILE_STRICT(flags_) \ + ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ + || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ + && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + +#define PROFILE_REPLACE(flags_) \ + ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ + || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ + && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + +#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) +#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) +#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800) +#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00) + +/* * The following variable is used in the sparse matrix code for a * TableEncoding to represent a page in the table that has no entries. */ @@ -230,6 +256,7 @@ static Tcl_EncodingConvertProc UtfToUtfProc; static Tcl_EncodingConvertProc Iso88591FromUtfProc; static Tcl_EncodingConvertProc Iso88591ToUtfProc; + /* * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field * of the internalrep. This should help the lifetime of encodings be more useful. @@ -1114,7 +1141,8 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + Tcl_ExternalToUtfDStringEx( + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1128,34 +1156,55 @@ Tcl_ExternalToUtfDString( * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: - * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default - * fallback character. Always return -1 (Default in Tcl 8.7). - * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. - * Only valid for "utf-8" and "cesu-8". This flag may be used together - * with the other flags. + * target encoding. It should be composed by OR-ing the following: + * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile + * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags + * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80 + * to 0x00. Only valid for "utf-8" and "cesu-8". + * Any other flag bits will cause an error to be returned (for future + * compatibility) * * Results: - * The converted bytes are stored in the DString, which is then NULL - * terminated in an encoding-specific manner. The return value is - * the error position in the source string or -1 if no conversion error - * is reported. - * + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. + * * Side effects: - * None. + * + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner. + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ExternalToUtfDStringEx( + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ int flags, /* Conversion control flags. */ - Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + (or TCL_INDEX_NONE if no error). May + be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1164,7 +1213,18 @@ Tcl_ExternalToUtfDStringEx( Tcl_Size dstLen; const char *srcStart = src; + /* DO FIRST - Must always be initialized before returning */ Tcl_DStringInit(dstPtr); + + if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) { + /* TODO - what other flags are illegal? - See TIP 656 */ + Tcl_SetResult(interp, + "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", + TCL_STATIC); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + return TCL_ERROR; + } + dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; @@ -1179,6 +1239,7 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } + flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; @@ -1189,19 +1250,45 @@ Tcl_ExternalToUtfDStringEx( flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - Tcl_DStringSetLength(dstPtr, soFar); - return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); - } - flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; - if (Tcl_DStringLength(dstPtr) == 0) { - Tcl_DStringSetLength(dstPtr, dstLen); - } - Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); - dst = Tcl_DStringValue(dstPtr) + soFar; - dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; + src += srcRead; + if (result != TCL_CONVERT_NOSPACE) { + Tcl_Size nBytesProcessed = (src - srcStart); + + Tcl_DStringSetLength(dstPtr, soFar); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } + else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed); + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf("unexpected byte sequence starting at index %" + TCL_Z_MODIFIER "u: '\\x%X'", + nBytesProcessed, + UCHAR(srcStart[nBytesProcessed]))); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); + } + } + return result; + } + + /* Expand space and continue */ + flags &= ~TCL_ENCODING_START; + srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { + Tcl_DStringSetLength(dstPtr, dstLen); + } + Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); + dst = Tcl_DStringValue(dstPtr) + soFar; + dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; } } @@ -1351,7 +1438,8 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr); + Tcl_UtfToExternalDStringEx( + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1364,36 +1452,53 @@ Tcl_UtfToExternalDString( * Convert a source buffer from UTF-8 to the specified encoding. * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in the - * target encoding. - * Possible flags values: - * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default - * fallback character. Always return -1 (Default in Tcl 8.7). - * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. - * Only valid for "utf-8" and "cesu-8". This flag may be used together - * with the other flags. + * target encoding. It should be composed by OR-ing the following: + * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} + * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile + * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags + * - TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 instead + * of 0x00. Only valid for "utf-8" and "cesu-8". * * Results: - * The converted bytes are stored in the DString, which is then NULL - * terminated in an encoding-specific manner. The return value is - * the error position in the source string or -1 if no conversion error - * is reported. + * The return value is one of + * TCL_OK: success. Converted string in *dstPtr + * TCL_ERROR: error in passed parameters. Error message in interp + * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence + * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition + * TCL_CONVERT_UNKNOWN: source contained a character that could not + * be represented in target encoding. * * Side effects: - * None. + * + * TCL_OK: The converted bytes are stored in the DString and NUL + * terminated in an encoding-specific manner + * TCL_ERROR: an error, message is stored in the interp if not NULL. + * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored + * in the interpreter (if not NULL). If errorLocPtr is not NULL, + * no error message is stored as it is expected the caller is + * interested in whatever is decoded so far and not treating this + * as an error condition. + * + * In addition, *dstPtr is always initialized and must be cleared + * by the caller irrespective of the return code. * *------------------------------------------------------------------------- */ -Tcl_Size +int Tcl_UtfToExternalDStringEx( + Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ Tcl_Size srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ int flags, /* Conversion control flags. */ - Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the * converted string is stored. */ + Tcl_Size *errorLocPtr) /* Where to store the error location + (or TCL_INDEX_NONE if no error). May + be NULL. */ { char *dst; Tcl_EncodingState state; @@ -1402,7 +1507,18 @@ Tcl_UtfToExternalDStringEx( const char *srcStart = src; Tcl_Size dstLen; + /* DO FIRST - must always be initialized on return */ Tcl_DStringInit(dstPtr); + + if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) { + /* TODO - what other flags are illegal? - See TIP 656 */ + Tcl_SetResult(interp, + "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", + TCL_STATIC); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); + return TCL_ERROR; + } + dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; @@ -1416,20 +1532,49 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } + + flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); + srcLen, flags, &state, dst, dstLen, + &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); src += srcRead; if (result != TCL_CONVERT_NOSPACE) { + Tcl_Size nBytesProcessed = (src - srcStart); int i = soFar + encodingPtr->nullSize - 1; while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } + else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); + int ucs4; + char buf[TCL_INTEGER_SPACE]; + TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); + sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed); + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "unexpected character at index %" TCL_Z_MODIFIER + "u: 'U+%06X'", + pos, + ucs4)); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + } + } + return result; } flags &= ~TCL_ENCODING_START; @@ -2257,14 +2402,12 @@ BinaryProc( *------------------------------------------------------------------------- */ -#define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN)) - static int UtfToUtfProc( void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ - int flags, /* Conversion control flags. */ + int flags, /* TCL_ENCODING_* conversion control flags. */ TCL_UNUSED(Tcl_EncodingState *), char *dst, /* Output buffer in which converted string is * stored. */ @@ -2286,6 +2429,7 @@ UtfToUtfProc( const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; int ch; + int profile; result = TCL_OK; @@ -2303,7 +2447,9 @@ UtfToUtfProc( flags |= PTR2INT(clientData); dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); + profile = TCL_ENCODING_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the @@ -2324,25 +2470,34 @@ UtfToUtfProc( */ *dst++ = *src++; - } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT) - || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - || (flags & ENCODING_FAILINDEX))) { - /* - * If in input mode, and -strict or -failindex is specified: This is an error. - */ - if ((STOPONERROR) && (flags & ENCODING_INPUT)) { - result = TCL_CONVERT_SYNTAX; - break; + } + else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && + (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && + (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || + PROFILE_REPLACE(profile))) { + /* Special sequence \xC0\x80 */ + if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) { + if (PROFILE_REPLACE(profile)) { + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + src += 2; + } else { + /* PROFILE_STRICT */ + result = TCL_CONVERT_SYNTAX; + break; + } + } else { + /* + * Convert 0xC080 to real nulls when we are in output mode, + * irrespective of the profile. + */ + *dst++ = 0; + src += 2; } + } + else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* - * Convert 0xC080 to real nulls when we are in output mode, with or without '-strict'. - */ - *dst++ = 0; - src += 2; - } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { - /* + * Incomplete byte sequence. * Always check before using TclUtfToUCS4. Not doing can so * cause it run beyond the end of the buffer! If we happen such an * incomplete char its bytes are made to represent themselves @@ -2350,32 +2505,45 @@ UtfToUtfProc( */ if (flags & ENCODING_INPUT) { - if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) { - result = TCL_CONVERT_MULTIBYTE; + /* Incomplete bytes for modified UTF-8 target */ + if (PROFILE_STRICT(profile)) { + result = (flags & TCL_ENCODING_CHAR_LIMIT) + ? TCL_CONVERT_MULTIBYTE + : TCL_CONVERT_SYNTAX; break; } - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) { - result = TCL_CONVERT_SYNTAX; - break; - } - } - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); + } + if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + ++src; + } else { + /* TCL_ENCODING_PROFILE_TCL8 */ + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); + } dst += Tcl_UniCharToUtf(ch, dst); - } else { + } + else { + int isInvalid = 0; size_t len = TclUtfToUCS4(src, &ch); if (flags & ENCODING_INPUT) { - if ((len < 2) && (ch != 0) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { - goto utf8Syntax; - } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF) - && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) { - utf8Syntax: - result = TCL_CONVERT_SYNTAX; - break; + if ((len < 2) && (ch != 0)) { + isInvalid = 1; + } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) { + isInvalid = 1; + } + if (isInvalid) { + if (PROFILE_STRICT(profile)) { + result = TCL_CONVERT_SYNTAX; + break; + } + else if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + } } } + const char *saveSrc = src; src += len; if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) { @@ -2399,34 +2567,42 @@ UtfToUtfProc( /* * A surrogate character is detected, handle especially. */ - - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && (flags & ENCODING_UTF)) { + if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; } - int low = ch; - len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; - - if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { - - if (STOPONERROR) { - result = TCL_CONVERT_UNKNOWN; - src = saveSrc; - break; + if (PROFILE_REPLACE(profile)) { + /* TODO - is this right for cesu8 or should we fall through below? */ + ch = UNICODE_REPLACE_CHAR; + } + else { + int low = ch; + len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0; + + if ((!LOW_SURROGATE(low)) || (ch & 0x400)) { + + if (PROFILE_STRICT(profile)) { + result = TCL_CONVERT_UNKNOWN; + src = saveSrc; + break; + } + goto cesu8; } - goto cesu8; + src += len; + dst += Tcl_UniCharToUtf(ch, dst); + ch = low; } - src += len; - dst += Tcl_UniCharToUtf(ch, dst); - ch = low; #endif - } else if (STOPONERROR && !(flags & ENCODING_INPUT) && (((ch & ~0x7FF) == 0xD800))) { + } else if (PROFILE_STRICT(profile) && + (!(flags & ENCODING_INPUT)) && + SURROGATE(ch)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; - } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && (flags & ENCODING_INPUT) && ((ch & ~0x7FF) == 0xD800)) { + } else if (PROFILE_STRICT(profile) && + (flags & ENCODING_INPUT) && + SURROGATE(ch)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; break; @@ -2494,8 +2670,8 @@ Utf32ToUtfProc( /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ - if (bytesLeft != 0) { + /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen -= bytesLeft; } @@ -2517,17 +2693,14 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if ((unsigned)ch > 0x10FFFF) { - if (STOPONERROR) { + + if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } - ch = 0xFFFD; - } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) - && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; - break; + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; } } @@ -2541,25 +2714,31 @@ Utf32ToUtfProc( } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(unsigned int); + src += 4; } + + + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is not "strict", stick FFFD in its place. + */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { - /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - /* destination is not full, so we really are at the end now */ - if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { - result = TCL_CONVERT_SYNTAX; - } else { - result = TCL_OK; - dst += Tcl_UniCharToUtf(0xFFFD, dst); - numChars++; - src += bytesLeft; - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src += bytesLeft; /* Go past truncated code unit */ + } + } } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2636,11 +2815,14 @@ UtfToUtf32Proc( break; } len = TclUtfToUCS4(src, &ch); - if ((ch & ~0x7FF) == 0xD800) { - if (STOPONERROR) { + if (SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } } src += len; if (flags & TCL_ENCODING_LE) { @@ -2772,22 +2954,27 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } + + /* + * If we had a truncated code unit at the end AND this is the last + * fragment AND profile is not "strict", stick FFFD in its place. + */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { - /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - /* destination is not full, so we really are at the end now */ - if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { - result = TCL_CONVERT_SYNTAX; - } else { - result = TCL_OK; - dst += Tcl_UniCharToUtf(0xFFFD, dst); - numChars++; - src++; - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; /* Go past truncated code unit */ + } + } } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2864,11 +3051,14 @@ UtfToUtf16Proc( break; } len = TclUtfToUCS4(src, &ch); - if ((ch & ~0x7FF) == 0xD800) { - if (STOPONERROR) { + if (SURROGATE(ch)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } } src += len; if (flags & TCL_ENCODING_LE) { @@ -2971,25 +3161,25 @@ UtfToUcs2Proc( #if TCL_UTF_MAX < 4 len = TclUtfToUniChar(src, &ch); if ((ch >= 0xD800) && (len < 3)) { - if (STOPONERROR) { - result = TCL_CONVERT_UNKNOWN; - break; + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_UNKNOWN; + break; } src += len; src += TclUtfToUniChar(src, &ch); - ch = 0xFFFD; + ch = UNICODE_REPLACE_CHAR; } #else len = TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { - if (STOPONERROR) { - result = TCL_CONVERT_UNKNOWN; - break; + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_UNKNOWN; + break; } - ch = 0xFFFD; + ch = UNICODE_REPLACE_CHAR; } #endif - if (STOPONERROR && ((ch & ~0x7FF) == 0xD800)) { + if (PROFILE_STRICT(flags) && ((ch & ~0x7FF) == 0xD800)) { result = TCL_CONVERT_SYNTAX; break; } @@ -3087,24 +3277,35 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { + /* + * TODO - this is broken. For consistency with other + * decoders, an error should be raised only if strict. + * However, doing that check cause a whole bunch of test + * failures. Need to verify if those tests are in fact + * correct. + */ src--; result = TCL_CONVERT_MULTIBYTE; break; } + ch = toUnicode[byte][*((unsigned char *)src)]; ch = toUnicode[byte][*((unsigned char *) src)]; } else { ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if ((flags & ENCODING_FAILINDEX) - || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; break; } if (prefixBytes[byte]) { src--; } - ch = (Tcl_UniChar) byte; + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + ch = (Tcl_UniChar)byte; + } } /* @@ -3213,11 +3414,11 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } - word = dataPtr->fallback; + word = dataPtr->fallback; /* Both profiles REPLACE and TCL8 */ } if (prefixBytes[(word >> 8)] != 0) { if (dst + 1 > dstEnd) { @@ -3401,7 +3602,7 @@ Iso88591FromUtfProc( || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3414,7 +3615,7 @@ Iso88591FromUtfProc( * Plunge on, using '?' as a fallback character. */ - ch = (Tcl_UniChar) '?'; + ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */ } if (dst > dstEnd) { @@ -3628,9 +3829,10 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if (!STOPONERROR) { + if (!PROFILE_STRICT(flags)) { /* - * Skip the unknown escape sequence. + * Skip the unknown escape sequence. TODO - bug? + * May be replace with UNICODE_REPLACE_CHAR? */ src += longest; @@ -3803,7 +4005,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (STOPONERROR) { + if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -4097,6 +4299,158 @@ InitializeEncodingSearchPath( } /* + *------------------------------------------------------------------------ + * + * TclEncodingProfileParseName -- + * + * Maps an encoding profile name to its integer equivalent. + * + * Results: + * TCL_OK on success or TCL_ERROR on failure. + * + * Side effects: + * Returns the profile enum value in *profilePtr + * + *------------------------------------------------------------------------ + */ +int +TclEncodingProfileNameToId( + Tcl_Interp *interp, /* For error messages. May be NULL */ + const char *profileName, /* Name of profile */ + int *profilePtr) /* Output */ +{ + size_t i; + + for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + if (!strcmp(profileName, encodingProfiles[i].name)) { + *profilePtr = encodingProfiles[i].value; + return TCL_OK; + } + } + if (interp) { + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "bad profile \"%s\". Must be \"tcl8\" or \"strict\".", + profileName)); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "PROFILE", profileName, NULL); + } + return TCL_ERROR; +} + +/* + *------------------------------------------------------------------------ + * + * TclEncodingProfileValueToName -- + * + * Maps an encoding profile value to its name. + * + * Results: + * Pointer to the name or NULL on failure. Caller must not make + * not modify the string and must make a copy to hold on to it. + * + * Side effects: + * None. + *------------------------------------------------------------------------ + */ +const char * +TclEncodingProfileIdToName( + Tcl_Interp *interp, /* For error messages. May be NULL */ + int profileValue) /* Profile #define value */ +{ + size_t i; + + for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + if (profileValue == encodingProfiles[i].value) { + return encodingProfiles[i].name; + } + } + if (interp) { + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf( + "Internal error. Bad profile id \"%d\".", + profileValue)); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "PROFILEID", NULL); + } + return NULL; +} + +/* + *------------------------------------------------------------------------ + * + * TclEncodingExternalFlagsToInternal -- + * + * Maps the flags supported in the encoding C API's to internal flags. + * + * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is + * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile + * specified. + * + * If no profile or an invalid profile is specified, it is set to + * the default. + * + * Results: + * Internal encoding flag mask. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------ + */ +int TclEncodingExternalFlagsToInternal(int flags) +{ + if (flags & TCL_ENCODING_STOPONERROR) { + TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); + } + else { + int profile = TCL_ENCODING_PROFILE_GET(flags); + switch (profile) { + case TCL_ENCODING_PROFILE_TCL8: + case TCL_ENCODING_PROFILE_STRICT: + case TCL_ENCODING_PROFILE_REPLACE: + break; + case 0: /* Unspecified by caller */ + default: + TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); + break; + } + } + return flags; +} + +/* + *------------------------------------------------------------------------ + * + * TclGetEncodingProfiles -- + * + * Get the list of supported encoding profiles. + * + * Results: + * None. + * + * Side effects: + * The list of profile names is stored in the interpreter result. + * + *------------------------------------------------------------------------ + */ +void +TclGetEncodingProfiles(Tcl_Interp *interp) +{ + int i, n; + Tcl_Obj *objPtr; + n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); + objPtr = Tcl_NewListObj(n, NULL); + for (i = 0; i < n; ++i) { + Tcl_ListObjAppendElement( + interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, -1)); + } + Tcl_SetObjResult(interp, objPtr); +} + +/* * Local Variables: * mode: c * c-basic-offset: 4 diff --git a/generic/tclIO.c b/generic/tclIO.c index 26d0011..6d6a935 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1675,8 +1675,12 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, + TCL_ENCODING_PROFILE_DEFAULT); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, + TCL_ENCODING_PROFILE_DEFAULT); /* * Set the channel up initially in AUTO input translation mode to accept @@ -4343,21 +4347,6 @@ Write( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->outputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; - } else { - statePtr->outputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN; - } - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->outputEncodingFlags |= TCL_ENCODING_STRICT; - } else { - statePtr->outputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * Write the terminated escape sequence even if srcLen is 0. */ @@ -4681,21 +4670,6 @@ Tcl_GetsObj( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN; - } - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * Object used by FilterInputBytes to keep track of how much data has been * consumed from the channel buffers. */ @@ -5458,21 +5432,6 @@ FilterInputBytes( } gsPtr->state = statePtr->inputEncodingState; - /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN; - } - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen, statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE, &statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead, @@ -6259,21 +6218,6 @@ ReadChars( } /* - * Transfer encoding nocomplain/strict option to the encoding flags - */ - - if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN; - } - if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) { - statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT; - } else { - statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT; - } - - /* * This routine is burdened with satisfying several constraints. It cannot * append more than 'charsToRead` chars onto objPtr. This is measured * after encoding and translation transformations are completed. There is @@ -7810,7 +7754,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding eofchar nocomplainencoding strictencoding translation"; + "blocking buffering buffersize encoding encodingprofile eofchar translation"; const char **argv; size_t argc, i; Tcl_DString ds; @@ -7951,7 +7895,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-encoding")) { + if (len == 0 || HaveOpt(8, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } @@ -7965,39 +7909,36 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-eofchar")) { - char buf[4] = ""; + if (len == 0 || HaveOpt(9, "-encodingprofile")) { + int profile; + const char *profileName; if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-eofchar"); + Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); } - if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { - sprintf(buf, "%c", statePtr->inEofChar); + /* Note currently input and output profiles are same */ + profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); + profileName = TclEncodingProfileIdToName(interp, profile); + if (profileName == NULL) { + return TCL_ERROR; } + Tcl_DStringAppendElement(dsPtr, profileName); if (len > 0) { - Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); return TCL_OK; } - Tcl_DStringAppendElement(dsPtr, buf); } - if (len == 0 || HaveOpt(1, "-nocomplainencoding")) { + if (len == 0 || HaveOpt(2, "-eofchar")) { + char buf[4] = ""; if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-nocomplainencoding"); - } - Tcl_DStringAppendElement(dsPtr, - (flags & CHANNEL_ENCODING_NOCOMPLAIN) ? "1" : "0"); - if (len > 0) { - return TCL_OK; + Tcl_DStringAppendElement(dsPtr, "-eofchar"); } - } - if (len == 0 || HaveOpt(1, "-strictencoding")) { - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-strictencoding"); + if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { + sprintf(buf, "%c", statePtr->inEofChar); } - Tcl_DStringAppendElement(dsPtr, - (flags & CHANNEL_ENCODING_STRICT) ? "1" : "0"); if (len > 0) { + Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); return TCL_OK; } + Tcl_DStringAppendElement(dsPtr, buf); } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { @@ -8180,6 +8121,7 @@ Tcl_SetChannelOption( return TCL_OK; } else if (HaveOpt(2, "-encoding")) { Tcl_Encoding encoding; + int profile; if ((newValue[0] == '\0') || (strcmp(newValue, "binary") == 0)) { encoding = NULL; @@ -8204,9 +8146,12 @@ Tcl_SetChannelOption( Tcl_FreeEncoding(statePtr->encoding); statePtr->encoding = encoding; statePtr->inputEncodingState = NULL; + profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); statePtr->inputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); /* Same as input */ ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; @@ -8244,30 +8189,13 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-nocomplainencoding")) { - int newMode; - - if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) { - return TCL_ERROR; - } - if (newMode) { - SetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - } else { - ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN); - } - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; - } else if (HaveOpt(1, "-strictencoding")) { - int newMode; - - if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) { + } else if (HaveOpt(1, "-encodingprofile")) { + int profile; + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; } - if (newMode) { - SetFlag(statePtr, CHANNEL_ENCODING_STRICT); - } else { - ResetFlag(statePtr, CHANNEL_ENCODING_STRICT); - } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); return TCL_OK; } else if (HaveOpt(1, "-translation")) { @@ -9344,12 +9272,17 @@ TclCopyChannel( * of the bytes themselves. */ + /* + * TODO - should really only allow lossless profiles. Below reflects + * Tcl 8.7 alphas prior to encoding profiles + */ + moveBytes = inStatePtr->inEofChar == '\0' /* No eofChar to stop input */ && inStatePtr->inputTranslation == TCL_TRANSLATE_LF && outStatePtr->outputTranslation == TCL_TRANSLATE_LF && inStatePtr->encoding == outStatePtr->encoding - && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT - && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; + && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; /* * Allocate a new CopyState to maintain info about the current copy in @@ -9677,8 +9610,8 @@ CopyData( inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding - && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT - && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN; + && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; if (!(inBinary || sameEncoding)) { TclNewObj(bufObj); diff --git a/generic/tclIO.h b/generic/tclIO.h index 62cf6e8..a050010 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -277,16 +277,11 @@ typedef struct ChannelState { * encountered an encoding error */ #define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is * being used. */ -#define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option - * -nocomplainencoding is set to 1 */ -#define CHANNEL_ENCODING_STRICT (1<<18) /* set if option - * -strictencoding is set to 1 */ #define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed. * Its structures are still live and * usable, but it may not be closed * again from within the close * handler. */ -#define ENCODING_FAILINDEX (1<<20) /* Internal flag, fail on Invalid bytes only */ #define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed. * No further Tcl-level write IO on * the channel is allowed. */ diff --git a/generic/tclInt.h b/generic/tclInt.h index 827fd6f..9a9c0ae 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2875,7 +2875,19 @@ MODULE_SCOPE int tclFindExecutableSearchDone; MODULE_SCOPE char *tclMemDumpFileName; MODULE_SCOPE TclPlatformType tclPlatform; +/* + * Declarations related to internal encoding functions. + */ + MODULE_SCOPE Tcl_Encoding tclIdentityEncoding; +MODULE_SCOPE int +TclEncodingProfileNameToId(Tcl_Interp *interp, + const char *profileName, + int *profilePtr); +MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, + int profileId); +MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); +MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* * TIP #233 (Virtualized Time) @@ -4748,6 +4760,8 @@ MODULE_SCOPE Tcl_LibraryInitProc TclThread_Init; MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_Init; MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit; + + /* *---------------------------------------------------------------- * Macro used by the Tcl core to check whether a pattern has any characters diff --git a/generic/tclUtil.c b/generic/tclUtil.c index e96a564..3abd615 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -4026,10 +4026,11 @@ TclGetProcessGlobalValue( Tcl_MutexLock(&pgvPtr->mutex); epoch = ++pgvPtr->epoch; - Tcl_UtfToExternalDStringEx(pgvPtr->encoding, pgvPtr->value, - pgvPtr->numBytes, TCL_ENCODING_NOCOMPLAIN, &native); - Tcl_ExternalToUtfDStringEx(current, Tcl_DStringValue(&native), - Tcl_DStringLength(&native), TCL_ENCODING_NOCOMPLAIN, &newValue); + Tcl_UtfToExternalDStringEx(NULL, pgvPtr->encoding, pgvPtr->value, + pgvPtr->numBytes, TCL_ENCODING_PROFILE_TCL8, &native, NULL); + Tcl_ExternalToUtfDStringEx(NULL, current, Tcl_DStringValue(&native), + Tcl_DStringLength(&native), TCL_ENCODING_PROFILE_TCL8, + &newValue, NULL); Tcl_DStringFree(&native); Tcl_Free(pgvPtr->value); pgvPtr->value = (char *)Tcl_Alloc(Tcl_DStringLength(&newValue) + 1); diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 5a6dbc4..dc7c3f3 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -547,8 +547,8 @@ ExtractHeader( } } - Tcl_ExternalToUtfDStringEx(latin1enc, (char *) headerPtr->comment, -1, - TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->comment, -1, + TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp)); } SetValue(dictObj, "crc", Tcl_NewBooleanObj(headerPtr->hcrc)); @@ -564,8 +564,8 @@ ExtractHeader( } } - Tcl_ExternalToUtfDStringEx(latin1enc, (char *) headerPtr->name, -1, - TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->name, -1, + TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp)); } if (headerPtr->os != 255) { diff --git a/unix/tclUnixChan.c b/unix/tclUnixChan.c index c41cdd9..b81676e 100644 --- a/unix/tclUnixChan.c +++ b/unix/tclUnixChan.c @@ -1028,11 +1028,11 @@ TtyGetOptionProc( tcgetattr(fsPtr->fileState.fd, &iostate); Tcl_DStringInit(&ds); - Tcl_ExternalToUtfDStringEx(NULL, (char *) &iostate.c_cc[VSTART], 1, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, (char *) &iostate.c_cc[VSTART], 1, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); Tcl_DStringAppendElement(dsPtr, Tcl_DStringValue(&ds)); TclDStringClear(&ds); - Tcl_ExternalToUtfDStringEx(NULL, (char *) &iostate.c_cc[VSTOP], 1, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, (char *) &iostate.c_cc[VSTOP], 1, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); Tcl_DStringAppendElement(dsPtr, Tcl_DStringValue(&ds)); Tcl_DStringFree(&ds); } diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c index b205061..7753cec 100644 --- a/unix/tclUnixFCmd.c +++ b/unix/tclUnixFCmd.c @@ -762,16 +762,16 @@ TclpObjCopyDirectory( Tcl_Obj *transPtr; transPtr = Tcl_FSGetTranslatedPath(NULL,srcPathPtr); - Tcl_UtfToExternalDStringEx(NULL, + Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), - -1, TCL_ENCODING_NOCOMPLAIN, &srcString); + -1, TCL_ENCODING_PROFILE_TCL8, &srcString, NULL); if (transPtr != NULL) { Tcl_DecrRefCount(transPtr); } transPtr = Tcl_FSGetTranslatedPath(NULL,destPathPtr); - Tcl_UtfToExternalDStringEx(NULL, + Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), - -1, TCL_ENCODING_NOCOMPLAIN, &dstString); + -1, TCL_ENCODING_PROFILE_TCL8, &dstString, NULL); if (transPtr != NULL) { Tcl_DecrRefCount(transPtr); } @@ -826,9 +826,9 @@ TclpObjRemoveDirectory( int ret; Tcl_Obj *transPtr = Tcl_FSGetTranslatedPath(NULL, pathPtr); - Tcl_UtfToExternalDStringEx(NULL, + Tcl_UtfToExternalDStringEx(NULL, NULL, (transPtr != NULL ? TclGetString(transPtr) : NULL), - -1, TCL_ENCODING_NOCOMPLAIN, &pathString); + -1, TCL_ENCODING_PROFILE_TCL8, &pathString, NULL); if (transPtr != NULL) { Tcl_DecrRefCount(transPtr); } @@ -886,7 +886,7 @@ DoRemoveDirectory( result = TCL_OK; if ((errno != EEXIST) || (recursive == 0)) { if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, path, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, errorPtr); + Tcl_ExternalToUtfDStringEx(NULL, NULL, path, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); } result = TCL_ERROR; } @@ -1135,7 +1135,7 @@ TraverseUnixTree( end: if (errfile != NULL) { if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, errfile, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, errorPtr); + Tcl_ExternalToUtfDStringEx(NULL, NULL, errfile, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); } result = TCL_ERROR; } @@ -1205,8 +1205,8 @@ TraversalCopy( */ if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(dstPtr), - Tcl_DStringLength(dstPtr), TCL_ENCODING_NOCOMPLAIN, errorPtr); + Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(dstPtr), + Tcl_DStringLength(dstPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); } return TCL_ERROR; } @@ -1256,8 +1256,8 @@ TraversalDelete( break; } if (errorPtr != NULL) { - Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(srcPtr), - Tcl_DStringLength(srcPtr), TCL_ENCODING_NOCOMPLAIN, errorPtr); + Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(srcPtr), + Tcl_DStringLength(srcPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL); } return TCL_ERROR; } @@ -1424,7 +1424,7 @@ GetOwnerAttribute( } else { Tcl_DString ds; - Tcl_ExternalToUtfDStringEx(NULL, pwPtr->pw_name, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, pwPtr->pw_name, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); *attributePtrPtr = Tcl_DStringToObj(&ds); } return TCL_OK; @@ -2086,7 +2086,7 @@ TclpObjNormalizePath( */ Tcl_DStringFree(&ds); - Tcl_ExternalToUtfDStringEx(NULL, normPath, newNormLen, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, normPath, newNormLen, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); if (path[nextCheckpoint] != '\0') { /* @@ -2179,7 +2179,7 @@ TclUnixOpenTemporaryFile( if (dirObj) { string = Tcl_GetStringFromObj(dirObj, &length); - Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &templ); + Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL); } else { Tcl_DStringInit(&templ); Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */ @@ -2189,7 +2189,7 @@ TclUnixOpenTemporaryFile( if (basenameObj) { string = Tcl_GetStringFromObj(basenameObj, &length); - Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); TclDStringAppendDString(&templ, &tmp); Tcl_DStringFree(&tmp); } else { @@ -2201,7 +2201,7 @@ TclUnixOpenTemporaryFile( #ifdef HAVE_MKSTEMPS if (extensionObj) { string = Tcl_GetStringFromObj(extensionObj, &length); - Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); TclDStringAppendDString(&templ, &tmp); fd = mkstemps(Tcl_DStringValue(&templ), Tcl_DStringLength(&tmp)); Tcl_DStringFree(&tmp); @@ -2217,8 +2217,8 @@ TclUnixOpenTemporaryFile( } if (resultingNameObj) { - Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&templ), - Tcl_DStringLength(&templ), TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), + Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); Tcl_SetStringObj(resultingNameObj, Tcl_DStringValue(&tmp), Tcl_DStringLength(&tmp)); Tcl_DStringFree(&tmp); @@ -2304,7 +2304,7 @@ TclpCreateTemporaryDirectory( if (dirObj) { string = TclGetString(dirObj); - Tcl_UtfToExternalDStringEx(NULL, string, dirObj->length, TCL_ENCODING_NOCOMPLAIN, &templ); + Tcl_UtfToExternalDStringEx(NULL, NULL, string, dirObj->length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL); } else { Tcl_DStringInit(&templ); Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */ @@ -2317,7 +2317,7 @@ TclpCreateTemporaryDirectory( if (basenameObj) { string = TclGetString(basenameObj); if (basenameObj->length) { - Tcl_UtfToExternalDStringEx(NULL, string, basenameObj->length, TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_UtfToExternalDStringEx(NULL, NULL, string, basenameObj->length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); TclDStringAppendDString(&templ, &tmp); Tcl_DStringFree(&tmp); } else { @@ -2342,8 +2342,8 @@ TclpCreateTemporaryDirectory( * The template has been updated. Tell the caller what it was. */ - Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&templ), - Tcl_DStringLength(&templ), TCL_ENCODING_NOCOMPLAIN, &tmp); + Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ), + Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL); Tcl_DStringFree(&templ); return Tcl_DStringToObj(&tmp); } diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 830ed6f..fc297cb 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -153,7 +153,7 @@ TclpFindExecutable( #endif { encoding = Tcl_GetEncoding(NULL, NULL); - Tcl_ExternalToUtfDStringEx(encoding, name, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &utfName); + Tcl_ExternalToUtfDStringEx(NULL, encoding, name, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &utfName, NULL); TclSetObjNameOfExecutable( Tcl_NewStringObj(Tcl_DStringValue(&utfName), TCL_INDEX_NONE), encoding); Tcl_DStringFree(&utfName); @@ -179,8 +179,8 @@ TclpFindExecutable( Tcl_DStringAppend(&nameString, name, TCL_INDEX_NONE); Tcl_DStringFree(&buffer); - Tcl_UtfToExternalDStringEx(NULL, Tcl_DStringValue(&cwd), - Tcl_DStringLength(&cwd), TCL_ENCODING_NOCOMPLAIN, &buffer); + Tcl_UtfToExternalDStringEx(NULL, NULL, Tcl_DStringValue(&cwd), + Tcl_DStringLength(&cwd), TCL_ENCODING_PROFILE_TCL8, &buffer, NULL); if (Tcl_DStringValue(&cwd)[Tcl_DStringLength(&cwd) -1] != '/') { TclDStringAppendLiteral(&buffer, "/"); } @@ -189,8 +189,8 @@ TclpFindExecutable( Tcl_DStringFree(&nameString); encoding = Tcl_GetEncoding(NULL, NULL); - Tcl_ExternalToUtfDStringEx(encoding, Tcl_DStringValue(&buffer), TCL_INDEX_NONE, - TCL_ENCODING_NOCOMPLAIN, &utfName); + Tcl_ExternalToUtfDStringEx(NULL, encoding, Tcl_DStringValue(&buffer), TCL_INDEX_NONE, + TCL_ENCODING_PROFILE_TCL8, &utfName, NULL); TclSetObjNameOfExecutable( Tcl_NewStringObj(Tcl_DStringValue(&utfName), TCL_INDEX_NONE), encoding); Tcl_DStringFree(&utfName); @@ -825,7 +825,7 @@ TclpReadlink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, linkPtr); + Tcl_ExternalToUtfDStringEx(NULL, NULL, link, length, TCL_ENCODING_PROFILE_TCL8, linkPtr, NULL); return Tcl_DStringValue(linkPtr); #else return NULL; @@ -994,7 +994,7 @@ TclpObjLink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, link, length, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); linkPtr = Tcl_DStringToObj(&ds); Tcl_IncrRefCount(linkPtr); return linkPtr; @@ -1059,7 +1059,7 @@ TclpNativeToNormalized( { Tcl_DString ds; - Tcl_ExternalToUtfDStringEx(NULL, (const char *) clientData, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, (const char *) clientData, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); return Tcl_DStringToObj(&ds); } @@ -1113,7 +1113,7 @@ TclNativeCreateNativeRep( } str = Tcl_GetStringFromObj(validPathPtr, &len); - Tcl_UtfToExternalDStringEx(NULL, str, len, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_UtfToExternalDStringEx(NULL, NULL, str, len, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); len = Tcl_DStringLength(&ds) + sizeof(char); if (strlen(Tcl_DStringValue(&ds)) < len - sizeof(char)) { /* See bug [3118489]: NUL in filenames */ diff --git a/unix/tclUnixInit.c b/unix/tclUnixInit.c index 8f7a737..71b059a 100644 --- a/unix/tclUnixInit.c +++ b/unix/tclUnixInit.c @@ -473,7 +473,7 @@ TclpInitLibraryPath( */ str = getenv("TCL_LIBRARY"); /* INTL: Native. */ - Tcl_ExternalToUtfDStringEx(NULL, str, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &buffer); + Tcl_ExternalToUtfDStringEx(NULL, NULL, str, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &buffer, NULL); str = Tcl_DStringValue(&buffer); if ((str != NULL) && (str[0] != '\0')) { diff --git a/win/tclWinSock.c b/win/tclWinSock.c index e5c7ee3..4eeeeec 100644 --- a/win/tclWinSock.c +++ b/win/tclWinSock.c @@ -373,8 +373,8 @@ InitializeHostName( Tcl_DStringSetLength(&inDs, 256); if (gethostname(Tcl_DStringValue(&inDs), Tcl_DStringLength(&inDs)) == 0) { - Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&inDs), - TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&inDs), + TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); } Tcl_DStringFree(&inDs); } -- cgit v0.12 From 4207c3d5c35d308fda68b5f6faf2bdd97e421d5a Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 06:13:21 +0000 Subject: Tests pass modulo couple of differences in Tcl 8 and 9 default behavior to be discussed (tickets logged) --- tests/chanio.test | 6 +- tests/cmdAH.test | 661 +++++++++++++++++++++++++++++++++++--------------- tests/encoding.test | 186 +++++++------- tests/io.test | 52 ++-- tests/ioCmd.test | 16 +- tests/safe.test | 8 +- tests/socket.test | 2 +- tests/winConsole.test | 14 +- tests/zlib.test | 4 +- 9 files changed, 612 insertions(+), 337 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index ec0dbbd..18e1614 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -252,7 +252,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -265,7 +265,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -298,7 +298,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 8805906..634c3c4 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,238 +178,513 @@ test cmdAH-3.2 {Tcl_ContinueObjCmd, success} { list [catch {continue} msg] $msg } {4 {}} -test cmdAH-4.1 {Tcl_EncodingObjCmd} -returnCodes error -body { +### +# encoding command + +set "numargErrors(encoding system)" {^wrong # args: should be "(encoding |::tcl::encoding::)system \?encoding\?"$} +set "numargErrors(encoding convertfrom)" {wrong # args: should be "(encoding |::tcl::encoding::)convertfrom \?-profile profile\? \?-failindex var\? encoding data" or "(encoding |::tcl::encoding::)convertfrom data"} +set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tcl::encoding::)convertto \?-profile profile\? \?-failindex var\? encoding data" or "(encoding |::tcl::encoding::)convertto data"} +set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} +set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} + +source [file join [file dirname [info script]] encodingVectors.tcl] + + +# Maps utf-{16,32}{le,be} to utf-16, utf-32 and +# others to "". Used to test utf-16, utf-32 based +# on system endianness +proc endianUtf {enc} { + if {$::tcl_platform(byteOrder) eq "littleEndian"} { + set endian le + } else { + set endian be + } + if {$enc eq "utf-16$endian" || $enc eq "utf-32$endian"} { + return [string range $enc 0 5] + } + return "" +} + +# Map arbitrary strings to printable form in ASCII. +proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + +# +# Check errors for invalid number of arguments +proc badnumargs {id cmd cmdargs} { + variable numargErrors + test $id.a "Syntax error: $cmd $cmdargs" \ + -body [list {*}$cmd {*}$cmdargs] \ + -result $numargErrors($cmd) \ + -match regexp \ + -returnCodes error + test $id.b "Syntax error: $cmd (byte compiled)" \ + -setup [list proc compiled_proc {} [list {*}$cmd {*}$cmdargs]] \ + -body {compiled_proc} \ + -cleanup {rename compiled_proc {}} \ + -result $numargErrors($cmd) \ + -match regexp \ + -returnCodes error +} + +# Wraps tests resulting in unknown encoding errors +proc unknownencodingtest {id cmd} { + set result "unknown encoding \"[lindex $cmd end-1]\"" + test $id.a "Unknown encoding error: $cmd" \ + -body [list encoding {*}$cmd] \ + -result $result \ + -returnCodes error + test $id.b "Syntax error: $cmd (byte compiled)" \ + -setup [list proc encoding_test {} [list encoding {*}$cmd]] \ + -body {encoding_test} \ + -cleanup {rename encoding_test {}} \ + -result $result \ + -returnCodes error +} + +# Wraps tests for conversion, successful or not. +# Really more general than just for encoding conversion. +proc testconvert {id body result args} { + test $id.a $body \ + -body $body \ + -result $result \ + {*}$args + dict append args -setup \n[list proc compiled_script {} $body] + dict append args -cleanup "\nrename compiled_script {}" + test $id.b "$body (byte compiled)" \ + -body {compiled_script} \ + -result $result \ + {*}$args +} + +# Wrapper to verify encoding convert{to,from} ?-profile? +# Generates tests for compiled and uncompiled implementation. +# Also generates utf-{16,32} tests if passed encoding is utf-{16,32}{le,be} +# The enc and profile are appended to id to generate the test id +proc testprofile {id converter enc profile data result args} { + testconvert $id.$enc.$profile [list encoding $converter -profile $profile $enc $data] $result {*}$args + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.$profile [list encoding $converter -profile $profile $enc2 $data] $result {*}$args + } + + # If this is the default profile, generate a test without specifying profile + if {$profile eq $::encDefaultProfile} { + testconvert $id.$enc.default [list encoding $converter $enc $data] $result {*}$args + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.default [list encoding $converter $enc2 $data] $result {*}$args + } + } +} + + +# Wrapper to verify encoding convert{to,from} -failindex ?-profile? +# Generates tests for compiled and uncompiled implementation. +# Also generates utf-{16,32} tests if passed encoding is utf-{16,32}{le,be} +# The enc and profile are appended to id to generate the test id +proc testfailindex {id converter enc data result failidx {profile default}} { + testconvert $id.$enc.$profile "list \[encoding $converter -profile $profile -failindex idx $enc [list $data]\] \[set idx\]" [list $result $failidx] + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.$profile "list \[encoding $converter -profile $profile -failindex idx $enc2 [list $data]\] \[set idx]" [list $result $failidx] + } + + # If this is the default profile, generate a test without specifying profile + if {$profile eq $::encDefaultProfile} { + testconvert $id.$enc.default "list \[encoding $converter -failindex idx $enc [list $data]\] \[set idx]" [list $result $failidx] + if {[set enc2 [endianUtf $enc]] ne ""} { + # If utf{16,32}-{le,be}, also do utf{16,32} + testconvert $id.$enc2.default "list \[encoding $converter -failindex idx $enc2 [list $data]\] \[set idx]" [list $result $failidx] + } + } +} + +test cmdAH-4.1.1 {encoding} -returnCodes error -body { encoding } -result {wrong # args: should be "encoding subcommand ?arg ...?"} -test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { +test cmdAH-4.1.2 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding foo -} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} -test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertto -} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertto foo bar -} -result {unknown encoding "foo"} -test cmdAH-4.5 {Tcl_EncodingObjCmd} -setup { +} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, profiles, or system} + +# +# encoding system 4.2.* +badnumargs cmdAH-4.2.1 {encoding system} {ascii ascii} +test cmdAH-4.2.2 {Tcl_EncodingObjCmd} -setup { set system [encoding system] } -body { - encoding system jis0208 - encoding convertto 乎 + encoding system iso8859-1 + encoding system } -cleanup { encoding system $system -} -result 8C -test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { +} -result iso8859-1 + +# +# encoding convertfrom 4.3.* + +# Odd number of args is always invalid since last two args +# are ENCODING DATA and all options take a value +badnumargs cmdAH-4.3.1 {encoding convertfrom} {} +badnumargs cmdAH-4.3.2 {encoding convertfrom} {-failindex VAR ABC} +badnumargs cmdAH-4.3.3 {encoding convertfrom} {-profile VAR ABC} +badnumargs cmdAH-4.3.4 {encoding convertfrom} {-failindex VAR -profile strict ABC} +badnumargs cmdAH-4.3.5 {encoding convertfrom} {-profile strict -failindex VAR ABC} + +# Test that last two args always treated as ENCODING DATA +unknownencodingtest cmdAH-4.3.6 {convertfrom -failindex ABC} +unknownencodingtest cmdAH-4.3.7 {convertfrom -profile ABC} +unknownencodingtest cmdAH-4.3.8 {convertfrom nosuchencoding ABC} +unknownencodingtest cmdAH-4.3.9 {convertfrom -failindex VAR -profile ABC} +unknownencodingtest cmdAH-4.3.10 {convertfrom -profile strict -failindex ABC} +testconvert cmdAH-4.3.11 { + encoding convertfrom jis0208 \x38\x43 +} \u4e4e -setup { set system [encoding system] -} -body { encoding system iso8859-1 - encoding convertto jis0208 乎 } -cleanup { encoding system $system -} -result 8C -test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding convertfrom foo bar -} -result {unknown encoding "foo"} -test cmdAH-4.9 {Tcl_EncodingObjCmd} -setup { +} + +# Verify single arg defaults to system encoding +testconvert cmdAH-4.3.12 { + encoding convertfrom \x38\x43 +} \u4e4e -setup { set system [encoding system] -} -body { encoding system jis0208 - encoding convertfrom 8C } -cleanup { encoding system $system -} -result 乎 -test cmdAH-4.10 {Tcl_EncodingObjCmd} -setup { +} + +# convertfrom ?-profile? : valid byte sequences +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testprofile cmdAH-4.3.13.$hex.solo convertfrom $enc $profile $bytes $str + testprofile cmdAH-4.3.13.$hex.lead convertfrom $enc $profile $bytes$suffix_bytes $str$suffix + testprofile cmdAH-4.3.13.$hex.tail convertfrom $enc $profile $prefix_bytes$bytes $prefix$str + testprofile cmdAH-4.3.13.$hex.middle convertfrom $enc $profile $prefix_bytes$bytes$suffix_bytes $prefix$str$suffix + } +} + +# convertfrom ?-profile? : invalid byte sequences +foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + if {"knownBug" in $ctrl} continue + set bytes [binary format H* $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] + set result [list $str] + # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch + # so glob it out in error message pattern for now. + set errorWithoutPrefix [list "unexpected byte sequence starting at index $failidx: *" -returnCodes error -match glob] + set errorWithPrefix [list "unexpected byte sequence starting at index [expr {$failidx+$prefixLen}]: *" -returnCodes error -match glob] + if {$ctrl eq {} || "solo" in $ctrl} { + if {$failidx == -1} { + set result [list $str] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.3.13.$hex.solo convertfrom $enc $profile $bytes {*}$result + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + set result [list $str$suffix] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.3.13.$hex.lead convertfrom $enc $profile $bytes$suffix_bytes {*}$result + } + if {$ctrl eq {} || "tail" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix$str] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.3.13.$hex.tail convertfrom $enc $profile $prefix_bytes$bytes {*}$result + } + if {$ctrl eq {} || "middle" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix$str$suffix] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.3.13.$hex.middle convertfrom $enc $profile $prefix_bytes$bytes$suffix_bytes {*}$result + } +} + +# convertfrom -failindex ?-profile? - valid data +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + foreach profile $encProfiles { + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes $str -1 $profile + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes $str$suffix -1 $profile + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes $prefix$str -1 $profile + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes $prefix$str$suffix -1 $profile + } +} + +# convertfrom -failindex ?-profile? - invalid data +foreach {enc hex profile str failidx ctrl comment} $encInvalidBytes { + if {"knownBug" in $ctrl} continue + # There are multiple test cases based on location of invalid bytes + set bytes [binary decode hex $hex] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] + if {$ctrl eq {} || "solo" in $ctrl} { + testfailindex cmdAH-4.3.14.$hex.solo convertfrom $enc $bytes $str $failidx $profile + } + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + # If success expected + set result $str$suffix + } else { + # Failure expected + set result "" + } + testfailindex cmdAH-4.3.14.$hex.lead convertfrom $enc $bytes$suffix_bytes $result $failidx $profile + } + if {$ctrl eq {} || "tail" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$str + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.3.14.$hex.tail convertfrom $enc $prefix_bytes$bytes $result $expected_failidx $profile + } + if {$ctrl eq {} || "middle" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$str$suffix + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.3.14.$hex.middle convertfrom $enc $prefix_bytes$bytes$suffix_bytes $result $expected_failidx $profile + } +} + +# +# encoding convertto 4.4.* + +badnumargs cmdAH-4.4.1 {encoding convertto} {} +badnumargs cmdAH-4.4.2 {encoding convertto} {-failindex VAR ABC} +badnumargs cmdAH-4.4.3 {encoding convertto} {-profile VAR ABC} +badnumargs cmdAH-4.4.4 {encoding convertto} {-failindex VAR -profile strict ABC} +badnumargs cmdAH-4.4.5 {encoding convertto} {-profile strict -failindex VAR ABC} + +# Test that last two args always treated as ENCODING DATA +unknownencodingtest cmdAH-4.4.6 {convertto -failindex ABC} +unknownencodingtest cmdAH-4.4.7 {convertto -profile ABC} +unknownencodingtest cmdAH-4.4.8 {convertto nosuchencoding ABC} +unknownencodingtest cmdAH-4.4.9 {convertto -failindex VAR -profile ABC} +unknownencodingtest cmdAH-4.4.10 {convertto -profile strict -failindex ABC} +testconvert cmdAH-4.4.11 { + encoding convertto jis0208 \u4e4e +} \x38\x43 -setup { set system [encoding system] -} -body { encoding system iso8859-1 - encoding convertfrom jis0208 8C } -cleanup { encoding system $system -} -result 乎 -test cmdAH-4.11 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding names foo -} -result {wrong # args: should be "encoding names"} -test cmdAH-4.12 {Tcl_EncodingObjCmd} -returnCodes error -body { - encoding system foo bar -} -result {wrong # args: should be "encoding system ?encoding?"} -test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { +} + +# Verify single arg defaults to system encoding +testconvert cmdAH-4.4.12 { + encoding convertto \u4e4e +} \x38\x43 -setup { set system [encoding system] -} -body { - encoding system iso8859-1 - encoding system + encoding system jis0208 } -cleanup { encoding system $system -} -result iso8859-1 +} + +# convertto ?-profile? : valid byte sequences -test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { - encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { - encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { - encoding convertfrom -failindex 2 -nocomplain ABC -} -returnCodes 1 -result {unknown encoding "-nocomplain"} -test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body { - encoding convertto -failindex 2 -nocomplain ABC -} -returnCodes 1 -result {unknown encoding "-nocomplain"} -test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { - encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { - encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { - encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { - encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} -test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { - encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} -test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { - proc encoding_test {} { - encoding convertfrom -failindex ABC +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testprofile cmdAH-4.4.13.$printable.solo convertto $enc $profile $str $bytes + testprofile cmdAH-4.4.13.$printable.lead convertto $enc $profile $str$suffix $bytes$suffix_bytes + testprofile cmdAH-4.4.13.$printable.tail convertto $enc $profile $prefix$str $prefix_bytes$bytes + testprofile cmdAH-4.4.13.$printable.middle convertto $enc $profile $prefix$str$suffix $prefix_bytes$bytes$suffix_bytes } -} -body { - # Compile and execute - encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} -cleanup { - rename encoding_test "" } -test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { - encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { - proc encoding_test {} { - encoding convertto -failindex ABC + +# convertto ?-profile? : invalid byte sequences +foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc $prefix] + set suffix_bytes [encoding convertto $enc $suffix] + set prefixLen [string length $prefix_bytes] + set result [list $bytes] + # TODO - if the bad byte is unprintable, tcltest errors out when printing a mismatch + # so glob it out in error message pattern for now. + set errorWithoutPrefix [list "unexpected character at index $failidx: *" -returnCodes error -match glob] + set errorWithPrefix [list "unexpected character at index [expr {$failidx+$prefixLen}]: *" -returnCodes error -match glob] + if {$ctrl eq {} || "solo" in $ctrl} { + if {$failidx == -1} { + set result [list $bytes] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.4.13.$printable.solo convertto $enc $profile $str {*}$result } -} -body { - # Compile and execute - encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -cleanup { - rename encoding_test "" -} -test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { - encoding convertfrom -failindex test ABC - set test -} -returnCodes 0 -result -1 -test cmdAH-4.19.2 {convertrom -failindex with correct data (byt compiled)} -setup { - proc encoding_test {} { - encoding convertfrom -failindex test ABC - set test + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + set result [list $bytes$suffix_bytes] + } else { + set result $errorWithoutPrefix + } + testprofile cmdAH-4.4.13.$printable.lead convertto $enc $profile $str$suffix {*}$result } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result -1 -cleanup { - rename encoding_test "" -} -test cmdAH-4.19.3 {convertrom -failindex with correct data} -body { - encoding convertto -failindex test ABC - set test -} -returnCodes 0 -result -1 -test cmdAH-4.19.4 {convertrom -failindex with correct data (byt compiled)} -setup { - proc encoding_test {} { - encoding convertto -failindex test ABC - set test + if {$ctrl eq {} || "tail" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix_bytes$bytes] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.4.13.$printable.tail convertto $enc $profile $prefix$str {*}$result } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result -1 -cleanup { - rename encoding_test "" -} -test cmdAH-4.20.1 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i + if {$ctrl eq {} || "middle" in $ctrl} { + if {$failidx == -1} { + set result [list $prefix_bytes$bytes$suffix_bytes] + } else { + set result $errorWithPrefix + } + testprofile cmdAH-4.4.13.$printable.middle convertto $enc $profile $prefix$str$suffix {*}$result } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.20.3 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.4 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -strict -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i + +# convertto -failindex ?-profile? - valid data +foreach {enc str hex ctrl comment} $encValidStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefix_bytes [encoding convertto $enc A] + set suffix_bytes [encoding convertto $enc B] + foreach profile $encProfiles { + testfailindex cmdAH-4.4.14.$enc.$printable.solo convertto $enc $str $bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.lead convertto $enc $str$suffix $bytes$suffix_bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.tail convertto $enc $prefix$str $prefix_bytes$bytes -1 $profile + testfailindex cmdAH-4.4.14.$enc.$printable.middle convertto $enc $prefix$str$suffix $prefix_bytes$bytes$suffix_bytes -1 $profile } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.20.5 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.20.6 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertfrom -failindex i -strict utf-8 A\xc3] - binary scan $x H* y - list $y $i + +# convertto -failindex ?-profile? - invalid data +foreach {enc str profile hex failidx ctrl comment} $encUnencodableStrings { + if {"knownBug" in $ctrl} continue + set bytes [binary decode hex $hex] + set printable [printable $str] + set prefix A + set suffix B + set prefixLen [string length [encoding convertto $enc $prefix]] + if {$ctrl eq {} || "solo" in $ctrl} { + testfailindex cmdAH-4.4.14.$printable.solo convertto $enc $str $bytes $failidx $profile } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" -} -test cmdAH-4.21.1 {convertto -failindex with wrong character} -body { - set x [encoding convertto -failindex i iso8859-1 A\u0141] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} -test cmdAH-4.21.2 {convertto -failindex with wrong character (byte compiled)} -setup { - proc encoding_test {} { - set x [encoding convertto -failindex i iso8859-1 A\u0141] - binary scan $x H* y - list $y $i + if {$ctrl eq {} || "lead" in $ctrl} { + if {$failidx == -1} { + # If success expected + set result $bytes$suffix + } else { + # Failure expected + set result "" + } + testfailindex cmdAH-4.4.14.$printable.lead convertto $enc $str$suffix $result $failidx $profile + } + if {$ctrl eq {} || "tail" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$bytes + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.4.14.$printable.tail convertto $enc $prefix$str $result $expected_failidx $profile + } + if {$ctrl eq {} || "middle" in $ctrl} { + set expected_failidx $failidx + if {$failidx == -1} { + # If success expected + set result $prefix$bytes$suffix + } else { + # Failure expected + set result $prefix + incr expected_failidx $prefixLen + } + testfailindex cmdAH-4.4.14.$printable.middle convertto $enc $prefix$str$suffix $result $expected_failidx $profile } -} -body { - # Compile and execute - encoding_test -} -returnCodes 0 -result {41 1} -cleanup { - rename encoding_test "" } -test cmdAH-4.22 {convertfrom -strict} -body { - encoding convertfrom -strict utf-8 A\x00B -} -result A\x00B -test cmdAH-4.23 {convertfrom -strict} -body { - encoding convertfrom -strict utf-8 A\xC0\x80B -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\xC0'} +test cmdAH-4.4.xx {convertto -profile strict} -constraints {testbytestring knownBug} -body { + # TODO - what does testbytestring even test? Invalid UTF8 in the Tcl_Obj bytes field + encoding convertto -profile strict utf-8 A[testbytestring \x80]B +} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x80'} -test cmdAH-4.24 {convertto -strict} -body { - encoding convertto -strict utf-8 A\x00B -} -result A\x00B +# +# encoding names 4.5.* +badnumargs cmdAH-4.5.1 {encoding names} {foo} +test cmdAH-4.5.2 {encoding names should include at least utf-8 and iso8859-1 and at least one more} -body { + set names [encoding names] + list [expr {"utf-8" in $names}] [expr {"iso8859-1" in $names}] [expr {[llength $names] > 2}] +} -result {1 1 1} -test cmdAH-4.25 {convertfrom -strict} -constraints knownBug -body { - encoding convertfrom -strict utf-8 A\x80B -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x80'} +# +# encoding profiles 4.6.* +badnumargs cmdAH-4.6.1 {encoding profiles} {foo} +test cmdAH-4.6.2 {encoding profiles} -body { + lsort [encoding profiles] +} -result {replace strict tcl8} -test cmdAH-4.26 {convertto -strict} -constraints {testbytestring knownBug} -body { - encoding convertto -strict utf-8 A[testbytestring \x80]B -} -returnCodes error -result {unexpected byte sequence starting at index 1: '\x80'} +# +# file command test cmdAH-5.1 {Tcl_FileObjCmd} -returnCodes error -body { file diff --git a/tests/encoding.test b/tests/encoding.test index e0e1598..2deda8d 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -300,7 +300,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto -nocomplain iso8859-3 Õ] + append x [encoding convertto -profile tcl8 iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -339,7 +339,7 @@ test encoding-15.3 {UtfToUtfProc null character input} teststringbytes { } c080 test encoding-15.4 {UtfToUtfProc emoji character input} -body { set x \xED\xA0\xBD\xED\xB8\x82 - set y [encoding convertfrom -nocomplain utf-8 \xED\xA0\xBD\xED\xB8\x82] + set y [encoding convertfrom -profile tcl8 utf-8 \xED\xA0\xBD\xED\xB8\x82] list [string length $x] $y } -result "6 \uD83D\uDE02" test encoding-15.5 {UtfToUtfProc emoji character input} { @@ -349,67 +349,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} utf32 { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {12 edb882eda0bdedb882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] + set y [encoding convertto -profile tcl8 utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto -nocomplain utf-8 \uDE02é] + set y [encoding convertto -profile tcl8 utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto -nocomplain utf-8 \uDA02é] + set y [encoding convertto -profile tcl8 utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto -nocomplain utf-8 \uDE02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto -nocomplain utf-8 \uDA02Y] + set y [encoding convertto -profile tcl8 utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto -nocomplain utf-8 \uDE02] + set y [encoding convertto -profile tcl8 utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto -nocomplain utf-8 \uDA02] + set y [encoding convertto -profile tcl8 utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom -profile tcl8 utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -459,20 +459,20 @@ test encoding-15.25 {UtfToUtfProc CESU-8} { test encoding-15.26 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \xC0\x80 } \x00 -test encoding-15.27 {UtfToUtfProc -strict CESU-8} { - encoding convertfrom -strict cesu-8 \x00 +test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { + encoding convertfrom -profile strict cesu-8 \x00 } \x00 -test encoding-15.28 {UtfToUtfProc -strict CESU-8} -body { - encoding convertfrom -strict cesu-8 \xC0\x80 +test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body { + encoding convertfrom -profile strict cesu-8 \xC0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-15.29 {UtfToUtfProc CESU-8} { encoding convertto cesu-8 \x00 } \x00 -test encoding-15.30 {UtfToUtfProc -strict CESU-8} { - encoding convertto -strict cesu-8 \x00 +test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} { + encoding convertto -profile strict cesu-8 \x00 } \x00 -test encoding-15.31 {UtfToUtfProc -strict CESU-8 (bytes F0-F4 are invalid)} -body { - encoding convertfrom -strict cesu-8 \xF1\x86\x83\x9C +test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body { + encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { @@ -504,7 +504,7 @@ test encoding-16.7 {Utf32ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.8 {Utf32ToUtfProc} -body { - set val [encoding convertfrom -nocomplain utf-32 \x41\x00\x00\x41] + set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { @@ -532,7 +532,7 @@ test encoding-16.16 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { - list [encoding convertfrom -strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] + list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] } -result {A 4} test encoding-16.18 { @@ -571,10 +571,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 \xD8\xD8\xDC\xDC } -result "\U460DC" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16be "\uDCDC" + encoding convertto -profile tcl8 utf-16be "\uDCDC" } -result "\xDC\xDC" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto -nocomplain utf-16le "\uD8D8" + encoding convertto -profile tcl8 utf-16le "\uD8D8" } -result "\xD8\xD8" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" @@ -583,54 +583,54 @@ test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16be "\uDCDC" + encoding convertto -profile strict utf-16be "\uDCDC" } -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} test encoding-17.8 {UtfToUtf16Proc} -body { - encoding convertto -strict utf-16le "\uD8D8" + encoding convertto -profile strict utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-17.9 {Utf32ToUtfProc} -body { - encoding convertfrom -strict utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-17.10 {Utf32ToUtfProc} -body { - encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} -test encoding-18.2 {TableToUtfProc on invalid input with -strict} -body { - list [catch {encoding convertto -strict jis0208 \\} res] $res +test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { + list [catch {encoding convertto -profile strict jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} -test encoding-18.3 {TableToUtfProc on invalid input with -strict -failindex} -body { - list [catch {encoding convertto -strict -failindex pos jis0208 \\} res] $res $pos +test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body { + list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} -test encoding-18.4 {TableToUtfProc on invalid input with -failindex -strict} -body { - list [catch {encoding convertto -failindex pos -strict jis0208 \\} res] $res $pos +test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body { + list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos -} -result {0 {} 0} -test encoding-18.6 {TableToUtfProc on invalid input with -nocomplain} -body { - list [catch {encoding convertto -nocomplain jis0208 \\} res] $res +} -result {0 !) -1} +test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { + list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { encoding convertfrom ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { - encoding convertfrom -nocomplain ascii AÁ + encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.3 {TableFromUtfProc} -body { - encoding convertfrom -strict ascii AÁ + encoding convertfrom -profile strict ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx ascii AÁ] [set idx] -} -result {A 1} +} -result [list A\xC1 -1] test encoding-19.5 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -strict ascii AÁ] [set idx] + list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] } -result {A 1} test encoding-19.6 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -strict ascii AÁB] [set idx] + list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx] } -result {A 1} test encoding-20.1 {TableFreefProc} { @@ -743,31 +743,31 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -body { - encoding convertfrom -strict utf-8 "\xC0\x81" + encoding convertfrom -profile strict utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.13 {Parse valid or invalid utf-8} -body { - encoding convertfrom -strict utf-8 "\xC1\xBF" + encoding convertfrom -profile strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] @@ -787,83 +787,83 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.19 {Parse valid or invalid utf-8} -body { encoding convertto utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" -test encoding-24.20 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertfrom -nocomplain "\x20"] -} 1 -test encoding-24.21 {Parse with -nocomplain but without providing encoding} { - string length [encoding convertto -nocomplain "\x20"] -} 1 +test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { + encoding convertfrom -profile tcl8 "\x20" +} -result {wrong # args: should be "::tcl::encoding::convertfrom ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertfrom data"} -returnCodes error +test encoding-24.21 {Parse with -profile tcl8 but without providing encoding} -body { + string length [encoding convertto -profile tcl8 "\x20"] +} -result {wrong # args: should be "::tcl::encoding::convertto ?-profile profile? ?-failindex var? encoding data" or "::tcl::encoding::convertto data"} -returnCodes error test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertfrom -nocomplain ?encoding? data"} +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-strict? ?-failindex var? ?encoding? data" or "::tcl::encoding::convertto -nocomplain ?encoding? data"} -test encoding-24.24 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00" +} -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error +test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.25 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\x40\x80\x00\x00" +test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} -test encoding-24.26 {Parse valid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF1\x80\x80\x80" +test encoding-24.26 {Parse valid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80" } -result \U40000 -test encoding-24.27 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xF0\x80\x80\x80" +test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} -test encoding-24.28 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 "\xFF\x00\x00" +test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse noncharacter with -strict} -body { - encoding convertfrom -strict utf-8 \xEF\xBF\xBF +test encoding-24.30 {Parse noncharacter with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.31 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xEF\xBF\xBF +test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -strict} -body { - encoding convertto -strict utf-8 \uFFFF +test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { + encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.34 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uFFFF +test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { encoding convertfrom utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.36 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 \xED\xA0\x80 +test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xED\xA0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} -test encoding-24.37 {Parse invalid utf-8 with -nocomplain} -body { - encoding convertfrom -nocomplain utf-8 \xED\xA0\x80 +test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.39 {Try to generate invalid utf-8 with -strict} -body { - encoding convertto -strict utf-8 \uD800 +test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { + encoding convertto -profile strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.40 {Try to generate invalid utf-8 with -nocomplain} -body { - encoding convertto -nocomplain utf-8 \uD800 +test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { + encoding convertto -profile tcl8 utf-8 \uD800 } -result \xED\xA0\x80 -test encoding-24.41 {Parse invalid utf-8 with -strict} -body { - encoding convertfrom -strict utf-8 \xED\xA0\x80\xED\xB0\x80 +test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { + encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -nocomplain utf-8 \xF0\x80\x80\x80 + encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80 } -result \xF0\u20AC\u20AC\u20AC test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { - encoding convertfrom -nocomplain utf-8 \x80 + encoding convertfrom -profile tcl8 utf-8 \x80 } -result \u20AC -test encoding-24.44 {Try to generate invalid ucs-2 with -strict} -body { - encoding convertto -strict ucs-2 \uD800 +test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body { + encoding convertto -profile strict ucs-2 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.45 {Try to generate invalid ucs-2 with -strict} -body { - encoding convertto -strict ucs-2 \U10000 +test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body { + encoding convertto -profile strict ucs-2 \U10000 } -returnCodes 1 -result {unexpected character at index 0: 'U+010000'} file delete [file join [temporaryDirectory] iso2022.txt] @@ -1022,7 +1022,7 @@ test encoding-28.0 {all encodings load} -body { if {$name ne "unicode"} { incr count } - encoding convertto -nocomplain $name $string + encoding convertto -profile tcl8 $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. diff --git a/tests/io.test b/tests/io.test index cb1c691..b0142dd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -272,7 +272,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -286,7 +286,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -319,7 +319,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -nocomplainencoding 1 + fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -7622,7 +7622,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $in -encoding ascii -encodingprofile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out @@ -7644,7 +7644,7 @@ test io-52.21 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -strictencoding 1 + fconfigure $out -encoding ascii -translation lf -encodingprofile strict fcopy $in $out } -cleanup { @@ -7664,7 +7664,7 @@ test io-52.22 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $in -encoding ascii -encodingprofile strict fconfigure $out -encoding koi8-r -translation lf proc ::xxx args { set ::s0 $args @@ -7692,7 +7692,7 @@ test io-52.23 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -strictencoding 1 + fconfigure $out -encoding ascii -translation lf -encodingprofile strict proc ::xxx args { set ::s0 $args } @@ -9058,7 +9058,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -nocomplainencoding 1 -buffering none + fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9068,10 +9068,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-nocomplainencoding 1)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -nocomplainencoding 1 + fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -9085,14 +9085,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-noco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-nocomplainencoding 1)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -nocomplainencoding 1 + fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 } -body { set d [read $f] close $f @@ -9104,7 +9104,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-nocomplainencoding # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainencoding 1)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9113,7 +9113,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainenco puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -nocomplainencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9123,14 +9123,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-nocomplainenco removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -nocomplainencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 } -body { set d [read $f] close $f @@ -9140,7 +9140,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.6 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9148,7 +9148,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9159,7 +9159,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s removeFile io-75.6 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.7 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary @@ -9167,7 +9167,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { puts -nonewline $f A\xA1\x1A flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9182,7 +9182,7 @@ test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { removeFile io-75.7 } -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} -test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9190,7 +9190,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9205,7 +9205,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -strictencoding 1 + fconfigure $f -encoding iso8859-1 -encodingprofile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9249,7 +9249,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd @@ -9276,7 +9276,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9284,7 +9284,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} - puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 41abfb9..9e28569 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -eofchar -nocomplainencoding -strictencoding -translation + -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -244,19 +244,19 @@ test iocmd-8.7 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 + -eofchar {} -encoding utf-16 -encodingprofile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -1368,7 +1368,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1377,7 +1377,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1389,7 +1389,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/safe.test b/tests/safe.test index 7b73eb2..d81da0a 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-profile profile? ?-failindex var? encoding data" or "encoding convertfrom data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-strict? ?-failindex var? ?encoding? data" or "encoding convertfrom -nocomplain ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-profile profile? ?-failindex var? encoding data" or "encoding convertfrom data" while executing "encoding convertfrom" invoked from within @@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-profile profile? ?-failindex var? encoding data" or "encoding convertto data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-strict? ?-failindex var? ?encoding? data" or "encoding convertto -nocomplain ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-profile profile? ?-failindex var? encoding data" or "encoding convertto data" while executing "encoding convertto" invoked from within diff --git a/tests/socket.test b/tests/socket.test index a0fe2f7..b1435be 100644 --- a/tests/socket.test +++ b/tests/socket.test @@ -1071,7 +1071,7 @@ test socket_$af-7.3 {testing socket specific options} -constraints [list socket close $s update llength $l -} -result 22 +} -result 20 test socket_$af-7.4 {testing socket specific options} -constraints [list socket supported_$af] -setup { set timer [after 10000 "set x timed_out"] set l "" diff --git a/tests/winConsole.test b/tests/winConsole.test index b04f3e9..62dfbf3 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -nocomplainencoding, -strictencoding, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index 1c9514d..7e11634 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -nocomplainencoding 0 -strictencoding 0 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 8390c51fcaeaa278ec7ec40ec5d31ee187c25208 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 07:35:21 +0000 Subject: Fix [1d074b177a]. Failure to read .tclshrc --- unix/tclAppInit.c | 11 ++++++----- win/tclAppInit.c | 7 +++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/unix/tclAppInit.c b/unix/tclAppInit.c index 05d25de..e3d95bc 100644 --- a/unix/tclAppInit.c +++ b/unix/tclAppInit.c @@ -158,15 +158,16 @@ Tcl_AppInit( * is the name of the application. If this line is deleted then no * user-specific startup file will be run under any conditions. */ - #ifdef DJGPP - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclsh.rc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME "tclshrc.tcl" #else - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/.tclshrc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME ".tclshrc" #endif + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/" INITFILENAME "]", + -1, + TCL_EVAL_GLOBAL); return TCL_OK; } diff --git a/win/tclAppInit.c b/win/tclAppInit.c index 30127fd..077500a 100644 --- a/win/tclAppInit.c +++ b/win/tclAppInit.c @@ -215,8 +215,11 @@ Tcl_AppInit( * user-specific startup file will be run under any conditions. */ - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclshrc.tcl", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/tclshrc.tcl]", + -1, + TCL_EVAL_GLOBAL); + return TCL_OK; } -- cgit v0.12 From a826e66f11a2823847cb7788a1d929a9799c95ad Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 08:58:31 +0000 Subject: Add tests for Bug [46dda6fc29] --- tests/dstring.test | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/dstring.test b/tests/dstring.test index 314cee8..8699a5e 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} +test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\n"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\{"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\}"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\\"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From aa2b48262b02b2f6f23ba7f032f8ea1fb0bddbe3 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 09:26:06 +0000 Subject: Fix and tests for [46dda6fc29] --- generic/tclUtil.c | 4 ++-- tests/dstring.test | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclUtil.c b/generic/tclUtil.c index e96a564..8f2c16f 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1143,13 +1143,13 @@ TclScanElement( */ requireEscape = 1; - length -= (length > 0); + length -= (length+1 > 1); p++; break; } if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) { extra++; /* Escape sequences all one byte longer. */ - length -= (length > 0); + length -= (length+1 > 1); p++; } forbidNone = 1; diff --git a/tests/dstring.test b/tests/dstring.test index 8699a5e..23863d0 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -218,7 +218,7 @@ test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result \\\\\\n test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -226,7 +226,7 @@ test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \{]] test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -234,7 +234,7 @@ test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \}]] test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -242,7 +242,7 @@ test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \\]] test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From 2a5b403768444ddf2d6379ffe3644e9d5b230e19 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 16:29:09 +0000 Subject: Experimental fix for [fb368527ae] - length truncation --- generic/tclEncoding.c | 86 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..3a39966 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1160,8 +1160,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; - Tcl_Size dstLen; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; Tcl_DStringInit(dstPtr); @@ -1179,23 +1179,47 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { Tcl_DStringSetLength(dstPtr, soFar); return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } + flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } @@ -1398,9 +1422,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; - Tcl_Size dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1416,16 +1440,40 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; while (1) { + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - int i = soFar + encodingPtr->nullSize - 1; + /* Move past the part processed in this go around */ + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { + size_t i = soFar + encodingPtr->nullSize - 1; + /* Loop as DStringSetLength only stores one nul byte at a time */ while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } @@ -1433,7 +1481,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } -- cgit v0.12 From 7b0b2ebd37ef0e7ea2e38a394ee476fed9829a35 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 17:23:42 +0000 Subject: Fix large writes to file. Need to break into INT_MAX size chunks. --- generic/tclIO.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 26d0011..82887d9 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4231,7 +4231,6 @@ Tcl_WriteObj( Channel *chanPtr; ChannelState *statePtr; /* State info for channel */ const char *src; - size_t srcLen = 0; statePtr = ((Channel *) chan)->state; chanPtr = statePtr->topChanPtr; @@ -4240,19 +4239,45 @@ Tcl_WriteObj( return TCL_INDEX_NONE; } if (statePtr->encoding == NULL) { - size_t result; + size_t srcLen; + size_t totalWritten = 0; src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); + /* TODO - refactor common code below */ if (src == NULL) { Tcl_SetErrno(EILSEQ); - result = TCL_INDEX_NONE; + totalWritten = TCL_INDEX_NONE; } else { - result = WriteBytes(chanPtr, src, srcLen); - } - return result; + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + written = WriteBytes(chanPtr, src, chunkSize); + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + + return totalWritten; } else { + size_t srcLen; + size_t totalWritten = 0; src = Tcl_GetStringFromObj(objPtr, &srcLen); - return WriteChars(chanPtr, src, srcLen); + /* + * Note original code always called WriteChars even if srcLen 0 + * so we will too. + */ + do { + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + written = WriteChars(chanPtr, src, chunkSize); + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + return totalWritten; } } -- cgit v0.12 From 2fb6b99620807fff819ce6c0c7ac60ae4774fc73 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 26 Feb 2023 06:26:01 +0000 Subject: Minor refactor, add tests --- generic/tclIO.c | 59 ++++++++++++++++++++++++--------------------------------- tests/io.test | 29 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 34 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 82887d9..ff0e7fb 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4238,47 +4238,38 @@ Tcl_WriteObj( if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) { return TCL_INDEX_NONE; } - if (statePtr->encoding == NULL) { - size_t srcLen; - size_t totalWritten = 0; + size_t srcLen; + if (statePtr->encoding == NULL) { src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); - /* TODO - refactor common code below */ if (src == NULL) { Tcl_SetErrno(EILSEQ); - totalWritten = TCL_INDEX_NONE; - } else { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteBytes(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - - return totalWritten; + return TCL_INDEX_NONE; + } } else { - size_t srcLen; - size_t totalWritten = 0; src = Tcl_GetStringFromObj(objPtr, &srcLen); - /* - * Note original code always called WriteChars even if srcLen 0 - * so we will too. - */ - do { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteChars(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - return totalWritten; } + + size_t totalWritten = 0; + /* + * Note original code always called WriteChars even if srcLen 0 + * so we will too. + */ + do { + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + if (statePtr->encoding == NULL) { + written = WriteBytes(chanPtr, src, chunkSize); + } else { + written = WriteChars(chanPtr, src, chunkSize); + } + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + return totalWritten; } static void diff --git a/tests/io.test b/tests/io.test index cb1c691..83735c3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -36,6 +36,7 @@ namespace eval ::tcl::test::io { } source [file join [file dirname [info script]] tcltests.tcl] +testConstraint pointerIs64bit [expr {$::tcl_platform(pointerSize) >= 8}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testchannel [llength [info commands testchannel]] testConstraint testfevent [llength [info commands testfevent]] @@ -194,6 +195,20 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { set sizes } {19 19 19 19 19} +test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.10.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 + test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -236,6 +251,20 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { lappend x [contents $path(test1)] } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] +test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.5.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 + test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written -- cgit v0.12 From deec081e744286a433ade1f6dad4e8fca0a20705 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 26 Feb 2023 07:15:06 +0000 Subject: Also fix [90ff9b7f73] - writes of exactly 4294967295 bytes --- generic/tclIOCmd.c | 6 +++--- tests/io.test | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 4ce27bb..9493a67 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -106,7 +106,7 @@ Tcl_PutsObjCmd( Tcl_Obj *string; /* String to write. */ Tcl_Obj *chanObjPtr = NULL; /* channel object. */ int newline; /* Add a newline at end? */ - int result; /* Result of puts operation. */ + size_t result; /* Result of puts operation. */ int mode; /* Mode in which channel is opened. */ switch (objc) { @@ -163,12 +163,12 @@ Tcl_PutsObjCmd( TclChannelPreserve(chan); result = Tcl_WriteObj(chan, string); - if (result == -1) { + if (result == (size_t) -1) { goto error; } if (newline != 0) { result = Tcl_WriteChars(chan, "\n", 1); - if (result == -1) { + if (result == (size_t) -1) { goto error; } } diff --git a/tests/io.test b/tests/io.test index 83735c3..20b240f 100644 --- a/tests/io.test +++ b/tests/io.test @@ -208,6 +208,33 @@ test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { # TODO - Should really read it back in but large reads are not currently working! file size $tmpfile } -result 2147483648 +test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.11.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.12.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -264,6 +291,33 @@ test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { # TODO - Should really read it back in but large reads are not currently working! file size $tmpfile } -result 2147483648 +test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.6.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.7.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written -- cgit v0.12 From 79cbdf745a36be243633e74267ba4dd96e62d8a5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 12:27:43 +0000 Subject: (size_t) -1 -> TCL_INDEX_NONE --- generic/tclIOCmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 9493a67..197ca32 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -163,12 +163,12 @@ Tcl_PutsObjCmd( TclChannelPreserve(chan); result = Tcl_WriteObj(chan, string); - if (result == (size_t) -1) { + if (result == TCL_INDEX_NONE) { goto error; } if (newline != 0) { result = Tcl_WriteChars(chan, "\n", 1); - if (result == (size_t) -1) { + if (result == TCL_INDEX_NONE) { goto error; } } -- cgit v0.12 From 5ffda39949b785859a8ab5b9b4977536dde6f9f2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 12:56:18 +0000 Subject: Move the "srcLen -= srcChunkRead;" past the "if ((result != TCL_CONVERT_NOSPACE)..." (where it originally was), since this isn't needed if the loop ends anyway. --- generic/tclEncoding.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3a39966..a6ecc26 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1187,7 +1187,7 @@ Tcl_ExternalToUtfDStringEx( while (1) { int srcChunkLen, srcChunkRead; int dstChunkLen, dstChunkWrote, dstChunkChars; - + if (srcLen > INT_MAX) { srcChunkLen = INT_MAX; } else { @@ -1202,7 +1202,6 @@ Tcl_ExternalToUtfDStringEx( soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1210,7 +1209,7 @@ Tcl_ExternalToUtfDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1219,6 +1218,7 @@ Tcl_ExternalToUtfDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); @@ -1460,7 +1460,6 @@ Tcl_UtfToExternalDStringEx( /* Move past the part processed in this go around */ src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1468,7 +1467,7 @@ Tcl_UtfToExternalDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1481,6 +1480,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); -- cgit v0.12 From 8fdf20ec2b62f7da18e6acb82772c15d7ee2c596 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:01:29 +0000 Subject: More dstring testcases, extracted from [46dda6fc29] --- tests/dstring.test | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/dstring.test b/tests/dstring.test index 8a24ebe..6cf4bb8 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} +test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\n"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result \\\\\\n +test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\{"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \{]] +test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\}"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \}]] +test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\\"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \\]] test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From cbda5cb9b212067d1d831ec476057502e3c70531 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:41:06 +0000 Subject: Make Tcl_UtfToExternal()/Tcl_ExternalToUtf() report the error, if srcLen and dstLen are both > INT_MAX and therefore not all characters can be handled by this function. --- generic/tclEncoding.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..67e67e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1229,7 +1229,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1271,7 +1271,15 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { @@ -1467,7 +1475,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1506,7 +1514,15 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { -- cgit v0.12 From baf9b5e9bb89e1e13583fb510f6cb134d39126ff Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:54:02 +0000 Subject: Handle statePtr != NULL as well --- generic/tclEncoding.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 67e67e9..e639d3a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1271,17 +1271,16 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } @@ -1514,17 +1513,16 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } -- cgit v0.12 From 152d7203ac1b3f7f560995985c15f7527f2ecdc9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 17:19:07 +0000 Subject: Handle Tcl_UtfToExternal error in tclZlib.c --- generic/tclZlib.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 5a6dbc4..ea18c16 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -444,9 +444,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, - NULL); + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode comment", NULL); + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -465,8 +469,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode filename", NULL); + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { -- cgit v0.12 From e7b8b9d2dd7951ecf0e3cbbcb618244fd7c45ebb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 20:12:43 +0000 Subject: Proposed fix for [f9eafc3886]: Error handling in zlib comment/filename. With testcases --- generic/tclZlib.c | 47 ++++++++++++++++++++++++++++++++++++++--------- tests/zlib.test | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 63a25fa..cbff7b7 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -441,10 +441,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "comment", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Comment too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -462,9 +473,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "filename", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, + headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, + NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Filename too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { @@ -1189,7 +1212,8 @@ Tcl_ZlibStreamPut( { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; char *dataTmp = NULL; - int e, size, outSize, toStore; + int e; + int size, outSize, toStore; if (zshPtr->streamEnd) { if (zshPtr->interp) { @@ -1312,7 +1336,8 @@ Tcl_ZlibStreamGet( * may get less! */ { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; - int e, i, listLen, itemLen, dataPos = 0; + int e; + int i, listLen, itemLen, dataPos = 0; Tcl_Obj *itemObj; unsigned char *dataPtr, *itemPtr; int existing; @@ -1561,7 +1586,8 @@ Tcl_ZlibDeflate( int level, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, extraSize = 0; + int wbits = 0, e = 0, extraSize = 0; + int inLen = 0; Byte *inData = NULL; z_stream stream; GzipHeader header; @@ -1711,7 +1737,8 @@ Tcl_ZlibInflate( int bufferSize, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, newBufferSize; + int wbits = 0, e = 0; + int inLen = 0, newBufferSize; Byte *inData = NULL, *outData = NULL, *newOutData = NULL; z_stream stream; gz_header header, *headerPtr = NULL; @@ -2365,7 +2392,8 @@ ZlibPushSubcmd( const char *const *pushOptions = pushDecompressOptions; enum pushOptions {poDictionary, poHeader, poLevel, poLimit}; Tcl_Obj *headerObj = NULL, *compDictObj = NULL; - int limit = DEFAULT_BUFFER_SIZE, dummy; + int limit = DEFAULT_BUFFER_SIZE; + int dummy; if (objc < 4) { Tcl_WrongNumArgs(interp, 2, objv, "mode channel ?options...?"); @@ -2897,7 +2925,8 @@ ZlibTransformClose( Tcl_Interp *interp) { ZlibChannelData *cd = (ZlibChannelData *)instanceData; - int e, written, result = TCL_OK; + int e, result = TCL_OK; + int written; /* * Delete the support timer. diff --git a/tests/zlib.test b/tests/zlib.test index 7ddf1d7..c3e344c 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,6 +486,54 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} +test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment too large for zip} +test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] filename] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename too large for zip} +test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment contains characters > 0xFF} +test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename contains characters > 0xFF} test zlib-9.1 "check fcopy with push" -constraints zlib -setup { set sfile [makeFile {} testsrc.gz] -- cgit v0.12 From 6b172a213198a8d51d9b0b7783e0df3adc71bfe6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 21:37:15 +0000 Subject: fill in bug ticket-nr --- tests/zlib.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index c3e344c..61e14bb 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,7 +486,7 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} -test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] @@ -498,7 +498,7 @@ test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment too large for zip} -test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] @@ -510,7 +510,7 @@ test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Filename too large for zip} -test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] @@ -522,7 +522,7 @@ test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} -test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] -- cgit v0.12 From ed360ca42f9908e5d8da5e8f4742b07d0b148b17 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 03:13:38 +0000 Subject: Add perf constraint to large io tests to prevent memory faults on systems with limited memory --- tests/io.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/io.test b/tests/io.test index 20b240f..c245add 100644 --- a/tests/io.test +++ b/tests/io.test @@ -196,7 +196,7 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { } {19 19 19 19 19} test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.10.tmp] } -cleanup { @@ -209,7 +209,7 @@ test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 2147483648 test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.11.tmp] } -cleanup { @@ -222,7 +222,7 @@ test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints file size $tmpfile } -result 4294967296 test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.12.tmp] } -cleanup { @@ -279,7 +279,7 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.5.tmp] } -cleanup { @@ -292,7 +292,7 @@ test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 2147483648 test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.6.tmp] } -cleanup { @@ -305,7 +305,7 @@ test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 4294967296 test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.7.tmp] } -cleanup { -- cgit v0.12 From e2d89615d52e47ed3b683498567e058e809aea39 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 04:15:07 +0000 Subject: Tests for encoding strings > 4GB (under perf constraint) --- tests/encoding.test | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index e0e1598..8b14353 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1046,6 +1046,32 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} +test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967295 1} + +test encoding-30.1 {encoding convertto large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967296 1} + +test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967295 1} + +test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967296 1} + # cleanup namespace delete ::tcl::test::encoding -- cgit v0.12 From 85bf0db1e84ab483fce7962c151bedeb3f5e0993 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 12:31:49 +0000 Subject: Fix crash. int->size_t needs +1 in comparisons. --- generic/tclEncoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a6ecc26..d0756c7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1473,7 +1473,7 @@ Tcl_UtfToExternalDStringEx( !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { size_t i = soFar + encodingPtr->nullSize - 1; /* Loop as DStringSetLength only stores one nul byte at a time */ - while (i >= soFar) { + while (i+1 >= soFar+1) { Tcl_DStringSetLength(dstPtr, i--); } return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); -- cgit v0.12 From a1fe72fa4a3bf6c99720ce309d0611a5d941ea93 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 27 Feb 2023 20:50:33 +0000 Subject: Fix testcases --- tests/zlib.test | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index 61e14bb..5312d2b 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -490,48 +490,32 @@ test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment too large for zip} test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] filename] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename too large for zip} test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename contains characters > 0xFF} -- cgit v0.12 From a947270ff77379afdeda26a33f5f444337b820bc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 07:45:27 +0000 Subject: In case of combining TIP #494 (TCL_8_COMPAT) and #628 (building for Tcl 8.7 with 9.0 headers), ignore TCL_8_COMPAT macro. More Tcl_Size usage. --- generic/tcl.h | 14 +++++++------- generic/tclDecls.h | 2 +- generic/tclTest.c | 22 +++++++++++----------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index fa4da26..1a1452e 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -311,10 +311,10 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt; #define Tcl_WideAsDouble(val) ((double)((Tcl_WideInt)(val))) #define Tcl_DoubleAsWide(val) ((Tcl_WideInt)((double)(val))) -#if TCL_MAJOR_VERSION > 8 -typedef size_t Tcl_Size; -#else +#if TCL_MAJOR_VERSION < 9 typedef int Tcl_Size; +#else +typedef size_t Tcl_Size; #endif #ifdef _WIN32 @@ -452,17 +452,17 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData); #if TCL_MAJOR_VERSION > 8 typedef struct Tcl_RegExpIndices { - size_t start; /* Character offset of first character in + Tcl_Size start; /* Character offset of first character in * match. */ - size_t end; /* Character offset of first character after + Tcl_Size end; /* Character offset of first character after * the match. */ } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { - size_t nsubs; /* Number of subexpressions in the compiled + Tcl_Size nsubs; /* Number of subexpressions in the compiled * expression. */ Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ - size_t extendStart; /* The offset at which a subsequent match + Tcl_Size extendStart; /* The offset at which a subsequent match * might begin. */ } Tcl_RegExpInfo; #else diff --git a/generic/tclDecls.h b/generic/tclDecls.h index f219500..ed2eb74 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4229,7 +4229,7 @@ extern const TclStubs *tclStubsPtr; #define Tcl_GlobalEvalObj(interp, objPtr) \ Tcl_EvalObjEx(interp, objPtr, TCL_EVAL_GLOBAL) -#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) +#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) && TCL_MAJOR_VERSION > 8 # ifdef USE_TCL_STUBS # undef Tcl_Gets # undef Tcl_GetsObj diff --git a/generic/tclTest.c b/generic/tclTest.c index 652c5aa..b6c7f77 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -4105,7 +4105,7 @@ TestregexpObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, indices, stringLength, match, about; - size_t ii; + Tcl_Size ii; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; const char *string; @@ -4217,7 +4217,7 @@ TestregexpObjCmd( if (objc > 2 && (cflags®_EXPECT) && indices) { const char *varName; const char *value; - size_t start, end; + Tcl_Size start, end; char resinfo[TCL_INTEGER_SPACE * 2]; varName = Tcl_GetString(objv[2]); @@ -4257,11 +4257,11 @@ TestregexpObjCmd( Tcl_RegExpGetInfo(regExpr, &info); for (i = 0; i < objc; i++) { - size_t start, end; + Tcl_Size start, end; Tcl_Obj *newPtr, *varPtr, *valuePtr; varPtr = objv[i]; - ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (size_t)i; + ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (Tcl_Size)i; if (indices) { Tcl_Obj *objs[2]; @@ -6476,10 +6476,10 @@ static int TestWrongNumArgsObjCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ - size_t objc, /* Number of arguments. */ + Tcl_Size objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - size_t i, length; + Tcl_Size i, length; const char *msg; if (objc + 1 < 4) { @@ -7187,7 +7187,7 @@ TestUtfPrevCmd( int objc, Tcl_Obj *const objv[]) { - size_t numBytes, offset; + Tcl_Size numBytes, offset; char *bytes; const char *result; @@ -7228,7 +7228,7 @@ TestNumUtfCharsCmd( Tcl_Obj *const objv[]) { if (objc > 1) { - size_t numBytes, len, limit = TCL_INDEX_NONE; + Tcl_Size numBytes, len, limit = TCL_INDEX_NONE; const char *bytes = Tcl_GetStringFromObj(objv[1], &numBytes); if (objc > 2) { @@ -7296,7 +7296,7 @@ TestGetIntForIndexCmd( int objc, Tcl_Obj *const objv[]) { - size_t result; + Tcl_Size result; Tcl_WideInt endvalue; if (objc != 3) { @@ -7415,7 +7415,7 @@ TestHashSystemHashCmd( Tcl_SetHashValue(hPtr, INT2PTR(i+42)); } - if (hash.numEntries != (size_t)limit) { + if (hash.numEntries != (Tcl_Size)limit) { Tcl_AppendResult(interp, "unexpected maximal size", NULL); Tcl_DeleteHashTable(&hash); return TCL_ERROR; @@ -8175,7 +8175,7 @@ static int InterpCompiledVarResolver( TCL_UNUSED(Tcl_Interp *), const char *name, - TCL_UNUSED(size_t) /*length*/, + TCL_UNUSED(Tcl_Size) /*length*/, TCL_UNUSED(Tcl_Namespace *), Tcl_ResolvedVarInfo **rPtr) { -- cgit v0.12 From e743d3e48700a8b562d4a7e3893c856532ca107c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 09:57:22 +0000 Subject: Fix formatting issue in Tcl.n --- doc/Tcl.n | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/Tcl.n b/doc/Tcl.n index 8e0b342..0f784af 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -156,6 +156,8 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS +.RS +.RS .TP 7 \e\fBa\fR Audible alert (bell) (Unicode U+000007). @@ -222,6 +224,7 @@ inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. .RE +.RE .PP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. -- cgit v0.12 From 8b417f0e1ec2ff11fe856ea3d521356489c8dae0 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 09:59:29 +0000 Subject: Fix formatting issue in Tcl.n --- doc/Tcl.n | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/Tcl.n b/doc/Tcl.n index 8e0b342..0f784af 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -156,6 +156,8 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS +.RS +.RS .TP 7 \e\fBa\fR Audible alert (bell) (Unicode U+000007). @@ -222,6 +224,7 @@ inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. .RE +.RE .PP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. -- cgit v0.12 From 7d6cf09e029257c1c0656f2cd9253a4436e6a27c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 10:04:49 +0000 Subject: Make the descriptions in doc/Tcl.n more concise and intuitive. --- doc/Tcl.n | 315 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 194 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f784af..d13f3ea 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,178 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: .RS .RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -195,78 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. -.RE +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: -- cgit v0.12 From 57d266423a5638cbedc01dc406d5af47a146ca20 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 10:13:25 +0000 Subject: Make the descriptions in doc/Tcl.n more concise and intuitive. --- doc/Tcl.n | 315 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 194 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f784af..d13f3ea 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,178 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: .RS .RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -195,78 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. -.RE +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: -- cgit v0.12 From bb2f06fc739eb91a9f1499fbfbc4fa346172660e Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 11:42:15 +0000 Subject: Reverted [d156af9fb76dd2f4] and removed tests io-52.20 io-75.6 io-75.7, as this commit, intended to fix issue [b8f575aa2398b0e4], breaks the semantics of [read] and [gets]. Such a change would require an accepted TIP. See [b8f575aa2398b0e4] for further discussion. jn: @pouryorick See [b8f575aa2398b0e4] for the reason why this commit is not appropriate: It gets core-8-branch back in the buggy state it was, without even providing a real solution everyone agrees on. You shouldn't revert my patch just because I reverted yours. pooryorick: As I explained, the reason for this reversion is that it hard-codes an unapproved change in the semantics of [read] and [gets] into the test suite. Jan, your statement that it's a "revenge" reversion is false. I spent a month trying to find some alternative to this reversion before actually performing it. A commit that codifes in its tests changes in semantcs to [read]/[gets] simply shouldn't be on core-8-branch. --- generic/tclIO.c | 4 ++-- tests/io.test | 63 --------------------------------------------------------- 2 files changed, 2 insertions(+), 65 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 2e0cd1f..c96a406 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,7 +7588,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -8283,7 +8283,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { diff --git a/tests/io.test b/tests/io.test index 865ff7e..6821ff3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,27 +7609,6 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 -test io-52.20 {TclCopyChannel & encodings} -setup { - set out [open $path(utf8-fcopy.txt) w] - fconfigure $out -encoding utf-8 -translation lf - puts $out "Á" - close $out -} -constraints {fcopy} -body { - # binary to encoding => the input has to be - # in utf-8 to make sense to the encoder - - set in [open $path(utf8-fcopy.txt) r] - set out [open $path(kyrillic.txt) w] - - # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 - fconfigure $out -encoding koi8-r -translation lf - - fcopy $in $out -} -cleanup { - close $in - close $out -} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf @@ -9143,48 +9122,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 7661972907da4bc9f9d49d64cf6db1c0748936f9 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 11:57:03 +0000 Subject: Reverted [d156af9fb76dd2f4] and removed tests io-52.20 io-75.6 io-75.7, as this commit, intended to fix issue [b8f575aa2398b0e4], breaks the semantics of [read] and [gets]. Such a change would require an accepted TIP. See [b8f575aa2398b0e4] for further discussion. jn: @pouryorick See [b8f575aa2398b0e4] for the reason why this commit is not appropriate: It gets core-8-branch back in the buggy state it was, without even providing a real solution everyone agrees on. You shouldn't revert my patch just because I reverted yours. pooryorick: As I explained, the reason for this reversion is that it hard-codes an unapproved change in the semantics of [read] and [gets] into the test suite. Jan, your statement that it's a "revenge" reversion is false. I spent a month trying to find some alternative to this reversion before actually performing it. A commit that codifes in its tests changes in semantcs to [read]/[gets] simply shouldn't be on core-8-branch. --- generic/tclIO.c | 4 ++-- tests/io.test | 63 --------------------------------------------------------- 2 files changed, 2 insertions(+), 65 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 768a5c5..36889c7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7552,7 +7552,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -8223,7 +8223,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { diff --git a/tests/io.test b/tests/io.test index c245add..59b6c66 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7692,27 +7692,6 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 -test io-52.20 {TclCopyChannel & encodings} -setup { - set out [open $path(utf8-fcopy.txt) w] - fconfigure $out -encoding utf-8 -translation lf - puts $out "Á" - close $out -} -constraints {fcopy} -body { - # binary to encoding => the input has to be - # in utf-8 to make sense to the encoder - - set in [open $path(utf8-fcopy.txt) r] - set out [open $path(kyrillic.txt) w] - - # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 - fconfigure $out -encoding koi8-r -translation lf - - fcopy $in $out -} -cleanup { - close $in - close $out -} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf @@ -9223,48 +9202,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 70e40dd53dda7e7fca32fc8aec28cf6504e7ffec Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 12:16:10 +0000 Subject: Restore previous behavior for non-blocking mode, as for this mode the semantics of [read]/[gets] were not broken. This was the 'some agreement'. The change in line 8286 is necessary for both blocking and non-blocking mode: Whenver the encoding change we need to reset the CHANNEL_ENCODING_ERROR flag. --- generic/tclIO.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c96a406..ae09690 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,6 +7588,10 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ + if (GotFlag(statePtr, CHANNEL_NONBLOCKING) + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) { + return 0; + } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } @@ -8283,7 +8287,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { -- cgit v0.12 From 1eac8ab060855f0454c234be78839a46d8a9241e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 28 Feb 2023 12:25:34 +0000 Subject: Move setting of profile in flags parameter to lower level functions in case they are called directly --- generic/tclCmdAH.c | 11 +++-------- generic/tclEncoding.c | 19 +++++++++++++++---- generic/tclInt.h | 2 +- generic/tclTestObj.c | 2 +- 4 files changed, 20 insertions(+), 14 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 19a5bc3..ff0d00f 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -611,16 +611,11 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ } switch (optIndex) { case PROFILE: - if (TclEncodingProfileNameToId( - interp, Tcl_GetString(objv[argIndex]), &profile) - != TCL_OK) { + if (TclEncodingProfileNameToId(interp, + Tcl_GetString(objv[argIndex]), + &profile) != TCL_OK) { return TCL_ERROR; } -#ifdef NOTNEEDED - /* TODO - next line probably not needed as the conversion - functions already take care of mapping profile to flags */ - profile = TclEncodingExternalFlagsToInternal(profile); -#endif break; case FAILINDEX: failVarObj = objv[argIndex]; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 00ca5e8..05d231f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1301,7 +1301,6 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; @@ -1596,7 +1595,6 @@ Tcl_UtfToExternalDStringEx( srcLen = strlen(src); } - flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, @@ -2432,6 +2430,7 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2499,6 +2498,7 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2721,6 +2721,7 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch = 0, bytesLeft = srcLen % 4; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2874,6 +2875,7 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -2971,6 +2973,7 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3110,6 +3113,7 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3215,6 +3219,7 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3337,6 +3342,7 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3464,6 +3470,7 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3570,6 +3577,7 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3654,6 +3662,7 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3801,6 +3810,7 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -4024,6 +4034,7 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4463,7 +4474,7 @@ TclEncodingProfileIdToName( /* *------------------------------------------------------------------------ * - * TclEncodingExternalFlagsToInternal -- + * TclEncodingSetProfileFlags -- * * Maps the flags supported in the encoding C API's to internal flags. * @@ -4482,7 +4493,7 @@ TclEncodingProfileIdToName( * *------------------------------------------------------------------------ */ -int TclEncodingExternalFlagsToInternal(int flags) +int TclEncodingSetProfileFlags(int flags) { if (flags & TCL_ENCODING_STOPONERROR) { TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); diff --git a/generic/tclInt.h b/generic/tclInt.h index 538b177..bf5310b 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2890,7 +2890,7 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); +MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index fa91d67..4a2032c 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1514,7 +1514,7 @@ TeststringobjCmd( Tcl_SetObjResult(interp, varPtr[varIndex]); break; case 13: /* newunicode*/ - unicode = ckalloc((objc - 3) * sizeof(Tcl_UniChar)); + unicode = (unsigned short *) ckalloc((objc - 3) * sizeof(Tcl_UniChar)); for (i = 0; i < (objc - 3); ++i) { int val; if (Tcl_GetIntFromObj(interp, objv[i + 3], &val) != TCL_OK) { -- cgit v0.12 From bb80189110af1df66a42c1d79285638ebc54f038 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 12:43:53 +0000 Subject: Missing ')' --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index ae09690..6da1345 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7589,7 +7589,7 @@ Tcl_Eof( /* State of real channel structure. */ if (GotFlag(statePtr, CHANNEL_NONBLOCKING) - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) { + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; -- cgit v0.12 From 4227ef37bbd67235ead035bb544b3bcf864b1e87 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 13:33:04 +0000 Subject: Put back testcase io-52.20, and re-fix [4a7397e0b3] --- generic/tclIO.c | 7 ++++++- generic/tclIO.h | 2 ++ tests/io.test | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6da1345..da06171 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,7 +7588,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - if (GotFlag(statePtr, CHANNEL_NONBLOCKING) + if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } @@ -9803,6 +9803,7 @@ CopyData( * the bottom of the stack. */ + SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9918,6 +9919,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -10009,6 +10011,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -10031,6 +10034,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -10083,6 +10087,7 @@ CopyData( } } } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } diff --git a/generic/tclIO.h b/generic/tclIO.h index a69e990..689067f 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -233,6 +233,8 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ +#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy + * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/tests/io.test b/tests/io.test index 6821ff3..dd291dd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,6 +7609,27 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 +test io-52.20 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means reading the "Á" gives an error + fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $out -encoding koi8-r -translation lf + + fcopy $in $out +} -cleanup { + close $in + close $out +} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf -- cgit v0.12 From 3d177bd8b588eb3f64773a86cabc290208e031a5 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 28 Feb 2023 14:08:19 +0000 Subject: int -> Tcl_Size to match TIP --- generic/tcl.decls | 4 ++-- generic/tclDecls.h | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index a789ef6..f2ba187 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2445,12 +2445,12 @@ declare 657 { # TIP 656 declare 658 { int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, - const char *src, int srcLen, int flags, Tcl_DString *dsPtr, + const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr) } declare 659 { int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, - const char *src, int srcLen, int flags, Tcl_DString *dsPtr, + const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr) } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index fbfa8a1..adad630 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1957,12 +1957,14 @@ EXTERN int Tcl_UniCharIsUnicode(int ch); /* 658 */ EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, - int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size srcLen, int flags, + Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */ EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, - int srcLen, int flags, Tcl_DString *dsPtr, + Tcl_Size srcLen, int flags, + Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 660 */ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, @@ -2743,8 +2745,8 @@ typedef struct TclStubs { const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */ - int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */ - int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */ + int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */ + int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */ int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */ int (*tclListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */ int (*tclListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */ -- cgit v0.12 From 00e1068c039491b579117c6b38d7d415cb345e68 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 28 Feb 2023 17:16:25 +0000 Subject: Cherrypick [070225e33d]. Move setting of profile flags to lower level functions in case they are called directly --- generic/tclCmdAH.c | 11 +++-------- generic/tclEncoding.c | 19 +++++++++++++++---- generic/tclInt.h | 2 +- generic/tclTestObj.c | 17 +++++++++++++++++ tests/cmdAH.test | 5 ++++- 5 files changed, 40 insertions(+), 14 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index c60a077..7fab2f0 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -478,16 +478,11 @@ numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ } switch (optIndex) { case PROFILE: - if (TclEncodingProfileNameToId( - interp, Tcl_GetString(objv[argIndex]), &profile) - != TCL_OK) { + if (TclEncodingProfileNameToId(interp, + Tcl_GetString(objv[argIndex]), + &profile) != TCL_OK) { return TCL_ERROR; } -#ifdef NOTNEEDED - /* TODO - next line probably not needed as the conversion - functions already take care of mapping profile to flags */ - profile = TclEncodingExternalFlagsToInternal(profile); -#endif break; case FAILINDEX: failVarObj = objv[argIndex]; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 68f22b0..a208270 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1239,7 +1239,6 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; @@ -1533,7 +1532,6 @@ Tcl_UtfToExternalDStringEx( srcLen = strlen(src); } - flags = TclEncodingExternalFlagsToInternal(flags); flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, @@ -2369,6 +2367,7 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2436,6 +2435,7 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2661,6 +2661,7 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch, bytesLeft = srcLen % 4; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2791,6 +2792,7 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -2888,6 +2890,7 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3027,6 +3030,7 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3132,6 +3136,7 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; + flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3254,6 +3259,7 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3382,6 +3388,7 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3488,6 +3495,7 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3572,6 +3580,7 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3719,6 +3728,7 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; + flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3942,6 +3952,7 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; + flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4381,7 +4392,7 @@ TclEncodingProfileIdToName( /* *------------------------------------------------------------------------ * - * TclEncodingExternalFlagsToInternal -- + * TclEncodingSetProfileFlags -- * * Maps the flags supported in the encoding C API's to internal flags. * @@ -4400,7 +4411,7 @@ TclEncodingProfileIdToName( * *------------------------------------------------------------------------ */ -int TclEncodingExternalFlagsToInternal(int flags) +int TclEncodingSetProfileFlags(int flags) { if (flags & TCL_ENCODING_STOPONERROR) { TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); diff --git a/generic/tclInt.h b/generic/tclInt.h index 9a9c0ae..a90ac79 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2886,7 +2886,7 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags); +MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 131601d..833f39b 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1497,6 +1497,23 @@ TeststringobjCmd( Tcl_AppendUnicodeToObj(varPtr[varIndex], unicode + length, size - length); Tcl_SetObjResult(interp, varPtr[varIndex]); break; + case 13: /* newunicode*/ + unicode = (Tcl_UniChar *) ckalloc((objc - 3) * sizeof(Tcl_UniChar)); + for (i = 0; i < (objc - 3); ++i) { + int val; + if (Tcl_GetIntFromObj(interp, objv[i + 3], &val) != TCL_OK) { + break; + } + unicode[i] = (Tcl_UniChar)val; + } + if (i < (objc-3)) { + ckfree(unicode); + return TCL_ERROR; + } + SetVarToObj(varPtr, varIndex, Tcl_NewUnicodeObj(unicode, objc - 3)); + Tcl_SetObjResult(interp, varPtr[varIndex]); + ckfree(unicode); + break; } return TCL_OK; diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 634c3c4..5a48453 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -187,7 +187,10 @@ set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tc set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} -source [file join [file dirname [info script]] encodingVectors.tcl] +# TODO - encodingVectors not currently in tcl9 branch +if {[file exists [file join [file dirname [info script]] encodingVectors.tcl]]} { + source [file join [file dirname [info script]] encodingVectors.tcl] +} # Maps utf-{16,32}{le,be} to utf-16, utf-32 and -- cgit v0.12 From a6b4ef3d29565b68fb8c7104b11b03a99e0b153e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 1 Mar 2023 07:56:03 +0000 Subject: Fix [f8ef6b3670] crash. TclpSysAlloc macro was truncating size request to 32 bits on Windows. --- win/tclWinPort.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/win/tclWinPort.h b/win/tclWinPort.h index 2c01a6b..cc9453b 100644 --- a/win/tclWinPort.h +++ b/win/tclWinPort.h @@ -518,11 +518,11 @@ typedef DWORD_PTR * PDWORD_PTR; */ #define TclpSysAlloc(size) ((void*)HeapAlloc(GetProcessHeap(), \ - (DWORD)0, (DWORD)size)) + 0, size)) #define TclpSysFree(ptr) (HeapFree(GetProcessHeap(), \ - (DWORD)0, (HGLOBAL)ptr)) + 0, (HGLOBAL)ptr)) #define TclpSysRealloc(ptr, size) ((void*)HeapReAlloc(GetProcessHeap(), \ - (DWORD)0, (LPVOID)ptr, (DWORD)size)) + 0, (LPVOID)ptr, size)) /* This type is not defined in the Windows headers */ #define socklen_t int -- cgit v0.12 From 261c174d97c0b75e9c557c4c05514db34e52e58b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 1 Mar 2023 11:06:17 +0000 Subject: Fix msvc build (with OPTS=symbols) --- generic/tclZlib.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index a0e79ac..6278628 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -444,8 +444,8 @@ GenerateHeader( goto error; } else if (value != NULL) { Tcl_EncodingState state; - valueStr = Tcl_GetStringFromObj(value, &len); - result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + valueStr = Tcl_GetStringFromObj(value, &length); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); @@ -476,8 +476,8 @@ GenerateHeader( goto error; } else if (value != NULL) { Tcl_EncodingState state; - valueStr = Tcl_GetStringFromObj(value, &len); - result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + valueStr = Tcl_GetStringFromObj(value, &length); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); @@ -570,7 +570,7 @@ ExtractHeader( } } - Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, -1, + (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, -1, &tmp); SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp)); } @@ -587,7 +587,7 @@ ExtractHeader( } } - Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, -1, + (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, -1, &tmp); SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp)); } -- cgit v0.12 From 30a9b333ba194339f5e8f68575626df0701b2a50 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 1 Mar 2023 13:18:17 +0000 Subject: Bug [9a978f8323]: crash reading large files --- generic/tclIO.c | 26 ++++++++------ tests/io.test | 104 +++++++++++++++++++++++--------------------------------- 2 files changed, 58 insertions(+), 72 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index ff74a99..ce0dcc8 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -191,9 +191,9 @@ static int DetachChannel(Tcl_Interp *interp, Tcl_Channel chan); static void DiscardInputQueued(ChannelState *statePtr, int discardSavedBuffers); static void DiscardOutputQueued(ChannelState *chanPtr); -static int DoRead(Channel *chanPtr, char *dst, Tcl_Size bytesToRead, +static Tcl_Size DoRead(Channel *chanPtr, char *dst, Tcl_Size bytesToRead, int allowShortReads); -static int DoReadChars(Channel *chan, Tcl_Obj *objPtr, Tcl_Size toRead, +static Tcl_Size DoReadChars(Channel *chan, Tcl_Obj *objPtr, Tcl_Size toRead, int appendFlag); static int FilterInputBytes(Channel *chanPtr, GetsState *statePtr); @@ -5946,11 +5946,11 @@ Tcl_ReadChars( *--------------------------------------------------------------------------- */ -static int +static Tcl_Size DoReadChars( Channel *chanPtr, /* The channel to read. */ Tcl_Obj *objPtr, /* Input data is stored in this object. */ - Tcl_Size toRead, /* Maximum number of characters to store, or + Tcl_Size toRead, /* Maximum number of characters to store, or * TCL_INDEX_NONE to read all available data (up to EOF or * when channel blocks). */ int appendFlag) /* If non-zero, data read from the channel @@ -5961,7 +5961,8 @@ DoReadChars( ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; - int copied, copiedNow, result; + Tcl_Size copied; + int result; Tcl_Encoding encoding = statePtr->encoding; int binaryMode; #define UTF_EXPANSION_FACTOR 1024 @@ -6046,8 +6047,8 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - for (copied = 0; toRead > 0; ) { - copiedNow = -1; + for (copied = 0; toRead > 0 || toRead == TCL_INDEX_NONE; ) { + int copiedNow = -1; if (statePtr->inQueueHead != NULL) { if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead); @@ -6093,7 +6094,9 @@ DoReadChars( } } else { copied += copiedNow; - toRead -= copiedNow; + if (toRead != TCL_INDEX_NONE) { + toRead -= copiedNow; /* Only decr if not reading whole file */ + } } } @@ -6269,7 +6272,7 @@ ReadChars( size_t size; dst = TclGetStringStorage(objPtr, &size) + numBytes; - dstLimit = size - numBytes; + dstLimit = (size - numBytes) > INT_MAX ? INT_MAX : (size - numBytes); } else { dst = TclGetString(objPtr) + numBytes; } @@ -9671,9 +9674,10 @@ CopyData( Tcl_Obj *cmdPtr, *errObj = NULL, *bufObj = NULL, *msg = NULL; Tcl_Channel inChan, outChan; ChannelState *inStatePtr, *outStatePtr; - int result = TCL_OK, size; + int result = TCL_OK; Tcl_Size sizeb; Tcl_WideInt total; + Tcl_WideInt size; /* TODO - be careful if total and size are made unsigned */ const char *buffer; int inBinary, outBinary, sameEncoding; /* Encoding control */ @@ -10011,7 +10015,7 @@ CopyData( *---------------------------------------------------------------------- */ -static int +static Tcl_Size DoRead( Channel *chanPtr, /* The channel from which to read. */ char *dst, /* Where to store input read. */ diff --git a/tests/io.test b/tests/io.test index 065eb4c..5b81dde 100644 --- a/tests/io.test +++ b/tests/io.test @@ -195,46 +195,50 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { set sizes } {19 19 19 19 19} +proc testreadwrite {size {mode ""} args} { + set tmpfile [file join [temporaryDirectory] io-1.10.tmp] + set w [string repeat A $size] + try { + set fd [open $tmpfile w$mode] + try { + if {[llength $args]} { + fconfigure $fd {*}$args + } + puts -nonewline $fd $w + } finally { + close $fd + } + set fd [open $tmpfile r$mode] + try { + if {[llength $args]} { + fconfigure $fd {*}$args + } + set r [read $fd] + } finally { + close $fd + } + } finally { + file delete $tmpfile + } + string equal $w $r +} + test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-1.10.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile w] - puts -nonewline $fd [string repeat A 0x80000000] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 2147483648 + testreadwrite 0x80000000 +} -result 1 test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-1.11.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile w] - puts -nonewline $fd [string repeat A 0x100000000] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 4294967296 + testreadwrite 0x100000000 "" -buffersize 1000000 +} -result 1 test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-1.12.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile w] # *Exactly* UINT_MAX - separate bug from the general large file tests - puts -nonewline $fd [string repeat A 0xffffffff] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 4294967295 + testreadwrite 0xffffffff +} -result 1 test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -277,47 +281,25 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { close $f lappend x [contents $path(test1)] } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] - test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-2.5.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile wb] - puts -nonewline $fd [string repeat A 0x80000000] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 2147483648 + # Binary mode + testreadwrite 0x80000000 b +} -result 1 test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-2.6.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile wb] - puts -nonewline $fd [string repeat A 0x100000000] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 4294967296 + # Binary mode + testreadwrite 0x100000000 b -buffersize 1000000 +} -result 1 test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { pointerIs64bit perf -} -setup { - set tmpfile [file join [temporaryDirectory] io-2.7.tmp] -} -cleanup { - file delete $tmpfile } -body { - set fd [open $tmpfile wb] # *Exactly* UINT_MAX - separate bug from the general large file tests - puts -nonewline $fd [string repeat A 0xffffffff] - close $fd - # TODO - Should really read it back in but large reads are not currently working! - file size $tmpfile -} -result 4294967295 + testreadwrite 0xffffffff b +} -result 1 + test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written -- cgit v0.12 From 64e7f77040596a0716b5bd1f02942c8eb6124759 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 1 Mar 2023 13:58:52 +0000 Subject: Disable file permission tests under WSL as WSL does not support Unix file attrs without special config --- tests/fCmd.test | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tests/fCmd.test b/tests/fCmd.test index 93793d1..02b70cb 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -42,6 +42,9 @@ if {[testConstraint win]} { } testConstraint notInCIenv [expr {![info exists ::env(CI)] || !$::env(CI)}] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notInWsl [expr {[llength [array names ::env *WSL*]] == 0}] + set tmpspace /tmp;# default value # Find a group that exists on this Unix system, or else skip tests that # require Unix groups. @@ -449,7 +452,7 @@ test fCmd-4.10 {TclFileMakeDirsCmd: exists, is dir} -setup { } -result {1 1} test fCmd-4.11 {TclFileMakeDirsCmd: doesn't exist: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -returnCodes error -body { +} -constraints {unix notRoot testchmod notInWsl} -returnCodes error -body { file mkdir td1/td2/td3 testchmod 0 td1/td2 file mkdir td1/td2/td3/td4 @@ -467,7 +470,7 @@ test fCmd-4.13 {TclFileMakeDirsCmd: doesn't exist: errno == ENOENT} -setup { test fCmd-4.14 {TclFileMakeDirsCmd: TclpCreateDirectory fails} -setup { cleanup file delete -force foo -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir foo file attr foo -perm 0o40000 file mkdir foo/tf1 @@ -593,7 +596,7 @@ test fCmd-6.5 {CopyRenameOneFile: lstat(target) != 0} -setup { } -result {tf2} test fCmd-6.6 {CopyRenameOneFile: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 testchmod 0 td1 createfile tf1 @@ -712,7 +715,7 @@ test fCmd-6.22 {CopyRenameOneFile: copy/rename: !S_ISDIR(source)} -setup { } -result [file join $tmpspace tf1] test fCmd-6.23 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {xdev notRoot} -body { +} -constraints {xdev notRoot notInWsl} -body { file mkdir td1/td2/td3 file attributes td1 -permissions 0 file rename td1 $tmpspace @@ -764,7 +767,7 @@ test fCmd-6.27 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { } -match glob -result {error renaming "td1" to "/tmp/tcl*/td1": file already exists} test fCmd-6.28 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {notRoot xdev} -body { +} -constraints {notRoot xdev notInWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2/td3 -permissions 0 file rename td1 $tmpspace @@ -781,7 +784,7 @@ test fCmd-6.29 {CopyRenameOneFile: TclpCopyDirectory passed} -setup { } -result [file join $tmpspace td1 td2] test fCmd-6.30 {CopyRenameOneFile: TclpRemoveDirectory failed} -setup { cleanup $tmpspace -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar $tmpspace @@ -856,7 +859,7 @@ test fCmd-8.3 {file copy and path translation: ensure correct error} -body { test fCmd-9.1 {file rename: comprehensive: EACCES} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir td1 file mkdir td2 file attr td2 -perm 0o40000 @@ -882,7 +885,7 @@ test fCmd-9.3 {file rename: comprehensive: file to new name} -setup { } -result {{tf3 tf4} 1 0} test fCmd-9.4 {file rename: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod notDarwin9} -body { +} -constraints {unix notRoot testchmod notDarwin9 notInWsl} -body { file mkdir td1 td2 testchmod 0o555 td2 file rename td1 td3 @@ -903,7 +906,7 @@ test fCmd-9.5 {file rename: comprehensive: file to self} -setup { } -result {tf1 tf2 1 0} test fCmd-9.6 {file rename: comprehensive: dir to self} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 file mkdir td2 testchmod 0o555 td2 @@ -1097,7 +1100,7 @@ test fCmd-10.2 {file copy: comprehensive: file to new name} -setup { } -result {{tf1 tf2 tf3 tf4} tf1 tf2 1 0} test fCmd-10.3 {file copy: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir [file join td1 tdx] file mkdir [file join td2 tdy] testchmod 0o555 td2 @@ -1174,7 +1177,7 @@ test fCmd-10.5 {file copy: comprehensive: dir to empty dir} -setup { } -result [subst {{td1 td2 tdd1 tdd2 tdd3 tdd4 tds1 tds2 tds3 tds4} {1 {error copying "td1" to "[file join td2 td1]": file already exists}} {1 {error copying "tds1" to "[file join tdd1 tds1]": file already exists}} 1 1 1}] test fCmd-10.6 {file copy: comprehensive: dir to non-empty dir} -setup { cleanup -} -constraints {notRoot unixOrWin testchmod} -body { +} -constraints {notRoot unixOrWin testchmod notInWsl} -body { file mkdir tds1 file mkdir tds2 file mkdir [file join tdd1 tds1 xxx] @@ -1198,7 +1201,7 @@ test fCmd-10.7 {file rename: comprehensive: file to new name and dir} -setup { } -result [subst {{tf1 tf2} {[file join td1 tf3] [file join td1 tf4]} 1 0}] test fCmd-10.8 {file rename: comprehensive: dir to new name and dir} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 file mkdir td2 file mkdir td3 @@ -1395,7 +1398,7 @@ test fCmd-12.7 {renamefile: renaming directory into offspring} -setup { } -result {1} test fCmd-12.8 {renamefile: generic error} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/dir file attributes tfa -permissions 0o555 @@ -1582,7 +1585,7 @@ test fCmd-14.7 {copyfile: copy directory succeeding} -setup { } -result {1 1} test fCmd-14.8 {copyfile: copy directory failing} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa/dir/a/b/c file attributes tfa/dir -permissions 0 catch {file copy tfa tfa2} @@ -1727,7 +1730,7 @@ test fCmd-16.8 {remove a normal file} -constraints {notRoot} -setup { } -result {1} test fCmd-16.9 {error while deleting file} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa createfile tfa/a file attributes tfa -permissions 0o555 @@ -1758,7 +1761,7 @@ test fCmd-16.11 {TclFileDeleteCmd: removing a nonexistant file} -setup { # More coverage tests for mkpath() test fCmd-17.1 {mkdir stat failing on target but not ENOENT} -setup { catch {file delete -force -- tfa1} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa1 file attributes tfa1 -permissions 0o555 catch {file mkdir tfa1/tfa2} @@ -1968,7 +1971,7 @@ test fCmd-19.1 {remove empty directory} -constraints {notRoot} -setup { } -result {0} test fCmd-19.2 {rmdir error besides EEXIST} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa -permissions 0o555 @@ -1996,7 +1999,7 @@ test fCmd-19.3 {recursive remove} -constraints {notRoot} -setup { # test fCmd-20.1 {TraverseUnixTree : failure opening a subdirectory directory} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa/a -permissions 00000 -- cgit v0.12 From 5a3e0c869ebc400a9f268da243d855daff1bc632 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 1 Mar 2023 14:55:13 +0000 Subject: Cherrypick [f1dd5f1cc7]: constrain tests not supported on WSL --- tests/fCmd.test | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/tests/fCmd.test b/tests/fCmd.test index 8c9f799..e6fa893 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -42,6 +42,9 @@ if {[testConstraint win]} { } testConstraint notInCIenv [expr {![info exists ::env(CI)] || !$::env(CI)}] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notInWsl [expr {[llength [array names ::env *WSL*]] == 0}] + set tmpspace /tmp;# default value # Find a group that exists on this Unix system, or else skip tests that # require Unix groups. @@ -354,7 +357,7 @@ test fCmd-4.10 {TclFileMakeDirsCmd: exists, is dir} -setup { } -result {1 1} test fCmd-4.11 {TclFileMakeDirsCmd: doesn't exist: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -returnCodes error -body { +} -constraints {unix notRoot testchmod notInWsl} -returnCodes error -body { file mkdir td1/td2/td3 testchmod 0 td1/td2 file mkdir td1/td2/td3/td4 @@ -372,7 +375,7 @@ test fCmd-4.13 {TclFileMakeDirsCmd: doesn't exist: errno == ENOENT} -setup { test fCmd-4.14 {TclFileMakeDirsCmd: TclpCreateDirectory fails} -setup { cleanup file delete -force foo -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir foo file attr foo -perm 0o40000 file mkdir foo/tf1 @@ -497,7 +500,7 @@ test fCmd-6.5 {CopyRenameOneFile: lstat(target) != 0} -setup { } -result {tf2} test fCmd-6.6 {CopyRenameOneFile: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 testchmod 0 td1 createfile tf1 @@ -616,7 +619,7 @@ test fCmd-6.22 {CopyRenameOneFile: copy/rename: !S_ISDIR(source)} -setup { } -result [file join $tmpspace tf1] test fCmd-6.23 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {xdev notRoot} -body { +} -constraints {xdev notRoot notInWsl} -body { file mkdir td1/td2/td3 file attributes td1 -permissions 0 file rename td1 $tmpspace @@ -668,7 +671,7 @@ test fCmd-6.27 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { } -match glob -result {error renaming "td1" to "/tmp/tcl*/td1": file already exists} test fCmd-6.28 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {notRoot xdev} -body { +} -constraints {notRoot xdev notInWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2/td3 -permissions 0 file rename td1 $tmpspace @@ -685,7 +688,7 @@ test fCmd-6.29 {CopyRenameOneFile: TclpCopyDirectory passed} -setup { } -result [file join $tmpspace td1 td2] test fCmd-6.30 {CopyRenameOneFile: TclpRemoveDirectory failed} -setup { cleanup $tmpspace -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar $tmpspace @@ -760,7 +763,7 @@ test fCmd-8.3 {file copy and path translation: ensure correct error} -body { test fCmd-9.1 {file rename: comprehensive: EACCES} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir td1 file mkdir td2 file attr td2 -perm 0o40000 @@ -786,7 +789,7 @@ test fCmd-9.3 {file rename: comprehensive: file to new name} -setup { } -result {{tf3 tf4} 1 0} test fCmd-9.4 {file rename: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod notDarwin9} -body { +} -constraints {unix notRoot testchmod notDarwin9 notInWsl} -body { file mkdir td1 td2 testchmod 0o555 td2 file rename td1 td3 @@ -807,7 +810,7 @@ test fCmd-9.5 {file rename: comprehensive: file to self} -setup { } -result {tf1 tf2 1 0} test fCmd-9.6 {file rename: comprehensive: dir to self} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 file mkdir td2 testchmod 0o555 td2 @@ -1001,7 +1004,7 @@ test fCmd-10.2 {file copy: comprehensive: file to new name} -setup { } -result {{tf1 tf2 tf3 tf4} tf1 tf2 1 0} test fCmd-10.3 {file copy: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir [file join td1 tdx] file mkdir [file join td2 tdy] testchmod 0o555 td2 @@ -1078,7 +1081,7 @@ test fCmd-10.5 {file copy: comprehensive: dir to empty dir} -setup { } -result [subst {{td1 td2 tdd1 tdd2 tdd3 tdd4 tds1 tds2 tds3 tds4} {1 {error copying "td1" to "[file join td2 td1]": file already exists}} {1 {error copying "tds1" to "[file join tdd1 tds1]": file already exists}} 1 1 1}] test fCmd-10.6 {file copy: comprehensive: dir to non-empty dir} -setup { cleanup -} -constraints {notRoot unixOrWin testchmod} -body { +} -constraints {notRoot unixOrWin testchmod notInWsl} -body { file mkdir tds1 file mkdir tds2 file mkdir [file join tdd1 tds1 xxx] @@ -1102,7 +1105,7 @@ test fCmd-10.7 {file rename: comprehensive: file to new name and dir} -setup { } -result [subst {{tf1 tf2} {[file join td1 tf3] [file join td1 tf4]} 1 0}] test fCmd-10.8 {file rename: comprehensive: dir to new name and dir} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notInWsl} -body { file mkdir td1 file mkdir td2 file mkdir td3 @@ -1299,7 +1302,7 @@ test fCmd-12.7 {renamefile: renaming directory into offspring} -setup { } -result {1} test fCmd-12.8 {renamefile: generic error} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/dir file attributes tfa -permissions 0o555 @@ -1486,7 +1489,7 @@ test fCmd-14.7 {copyfile: copy directory succeeding} -setup { } -result {1 1} test fCmd-14.8 {copyfile: copy directory failing} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa/dir/a/b/c file attributes tfa/dir -permissions 0 catch {file copy tfa tfa2} @@ -1628,7 +1631,7 @@ test fCmd-16.8 {remove a normal file} -constraints {notRoot} -setup { } -result {1} test fCmd-16.9 {error while deleting file} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa createfile tfa/a file attributes tfa -permissions 0o555 @@ -1659,7 +1662,7 @@ test fCmd-16.11 {TclFileDeleteCmd: removing a nonexistant file} -setup { # More coverage tests for mkpath() test fCmd-17.1 {mkdir stat failing on target but not ENOENT} -setup { catch {file delete -force -- tfa1} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa1 file attributes tfa1 -permissions 0o555 catch {file mkdir tfa1/tfa2} @@ -1869,7 +1872,7 @@ test fCmd-19.1 {remove empty directory} -constraints {notRoot} -setup { } -result {0} test fCmd-19.2 {rmdir error besides EEXIST} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa -permissions 0o555 @@ -1897,7 +1900,7 @@ test fCmd-19.3 {recursive remove} -constraints {notRoot} -setup { # test fCmd-20.1 {TraverseUnixTree : failure opening a subdirectory directory} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notInWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa/a -permissions 00000 -- cgit v0.12 From a0beeb8a76662930610855307fa41a53956e0543 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 1 Mar 2023 15:39:06 +0000 Subject: Fix Tcl_UtfToExternalDStringEx call in macos code --- macosx/tclMacOSXFCmd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/macosx/tclMacOSXFCmd.c b/macosx/tclMacOSXFCmd.c index 7bdc72a..e24c555 100644 --- a/macosx/tclMacOSXFCmd.c +++ b/macosx/tclMacOSXFCmd.c @@ -643,7 +643,7 @@ SetOSTypeFromAny( size_t length; string = Tcl_GetStringFromObj(objPtr, &length); - Tcl_UtfToExternalDStringEx(encoding, string, length, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_UtfToExternalDStringEx(NULL, encoding, string, length, TCL_ENCODING_PROFILE_TCL8, &ds, NULL); if (Tcl_DStringLength(&ds) > 4) { if (interp) { -- cgit v0.12 From b2cdedcec2bbb94929cef675635c5864db8db8de Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 04:16:44 +0000 Subject: Eliminate TCL_ENCODING_MODIFIED flag --- generic/tcl.h | 13 +++++++------ generic/tclEncoding.c | 33 +++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 3fc53db..a92680d 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2123,12 +2123,12 @@ typedef struct Tcl_EncodingType { * content. Otherwise, the number of chars * produced is controlled only by other limiting * factors. - * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of - * 0x00. Only valid for "utf-8" and "cesu-8". - * This flag is implicit for external -> internal conversions, - * optional for internal -> external conversions. * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note * these are bit masks. + * + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS + * DEFINED IN tclEncoding.c (ENCODING_INPUT et al). Be cognizant of this + * when adding bits. */ #define TCL_ENCODING_START 0x01 @@ -2136,8 +2136,9 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_STOPONERROR 0x04 #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 -#define TCL_ENCODING_MODIFIED 0x20 -/* Reserve top byte for profile values (disjoint) */ +/* Internal use bits, do not define bits in this space. See above comment */ +#define TCL_ENCODING_INTERNAL_USE_MASK 0xFF00 +/* Reserve top byte for profile values (disjoint, not a mask) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 #define TCL_ENCODING_PROFILE_REPLACE 0x03000000 diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 05d231f..1d336f5 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -548,11 +548,16 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ -/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and - * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ -#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ +/* + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS + * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this + * when adding bits. TODO - should really be defined in a single file. + * + * To prevent conflicting bits, only define bits within 0xff00 mask here. + */ +#define TCL_ENCODING_LE 0x100 /* Used to distinguish LE/BE variants */ #define ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ -#define ENCODING_INPUT 0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */ +#define ENCODING_INPUT 0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */ void TclInitEncodingSubsystem(void) @@ -565,12 +570,16 @@ TclInitEncodingSubsystem(void) char c; short s; } isLe; + int leFlags; if (encodingsInitialized) { return; } - isLe.s = TCL_ENCODING_LE; + /* Note: This DEPENDS on TCL_ENCODING_LE being defined in least sig byte */ + isLe.s = 1; + leFlags = isLe.c ? TCL_ENCODING_LE : 0; + Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); @@ -611,7 +620,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; - type.clientData = INT2PTR(isLe.c); + type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; @@ -625,7 +634,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "utf-32"; - type.clientData = INT2PTR(isLe.c); + type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; @@ -639,7 +648,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; - type.clientData = INT2PTR(isLe.c|ENCODING_UTF); + type.clientData = INT2PTR(leFlags|ENCODING_UTF); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED @@ -1222,8 +1231,6 @@ Tcl_ExternalToUtfDString( * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80 - * to 0x00. Only valid for "utf-8" and "cesu-8". * Any other flag bits will cause an error to be returned (for future * compatibility) * @@ -1518,8 +1525,6 @@ Tcl_UtfToExternalDString( * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * - TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 instead - * of 0x00. Only valid for "utf-8" and "cesu-8". * * Results: * The return value is one of @@ -2466,7 +2471,7 @@ BinaryProc( static int UtfToUtfProc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ + void *clientData, /* additional flags */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* TCL_ENCODING_* conversion control flags. */ @@ -2536,7 +2541,7 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && - (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && + (UCHAR(src[1]) == 0x80) && (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || PROFILE_REPLACE(profile))) { /* Special sequence \xC0\x80 */ -- cgit v0.12 From f73632129523db232ea09ddd6b9c1797418b8af7 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 04:35:44 +0000 Subject: Cherrypick [13537afd1b] - eliminate TCL_ENCODING_MODIFIED --- generic/tcl.h | 13 +++++++------ generic/tclEncoding.c | 33 +++++++++++++++++++-------------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 2d2849f..12ce4ca 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1944,12 +1944,12 @@ typedef struct Tcl_EncodingType { * content. Otherwise, the number of chars * produced is controlled only by other limiting * factors. - * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of - * 0x00. Only valid for "utf-8" and "cesu-8". - * This flag is implicit for external -> internal conversions, - * optional for internal -> external conversions. * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note * these are bit masks. + * + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS + * DEFINED IN tclEncoding.c (ENCODING_INPUT et al). Be cognizant of this + * when adding bits. */ #define TCL_ENCODING_START 0x01 @@ -1963,8 +1963,9 @@ typedef struct Tcl_EncodingType { #endif #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 -#define TCL_ENCODING_MODIFIED 0x20 -/* Reserve top byte for profile values (disjoint) */ +/* Internal use bits, do not define bits in this space. See above comment */ +#define TCL_ENCODING_INTERNAL_USE_MASK 0xFF00 +/* Reserve top byte for profile values (disjoint, not a mask) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 #define TCL_ENCODING_PROFILE_REPLACE 0x03000000 diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b2b319d..264ca96 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -549,11 +549,16 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ -/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and - * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ -#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ +/* + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS + * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this + * when adding bits. TODO - should really be defined in a single file. + * + * To prevent conflicting bits, only define bits within 0xff00 mask here. + */ +#define TCL_ENCODING_LE 0x100 /* Used to distinguish LE/BE variants */ #define ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ -#define ENCODING_INPUT 0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */ +#define ENCODING_INPUT 0x400 /* For UTF-8/CESU-8 encoding, means external -> internal */ void TclInitEncodingSubsystem(void) @@ -566,12 +571,16 @@ TclInitEncodingSubsystem(void) char c; short s; } isLe; + int leFlags; if (encodingsInitialized) { return; } - isLe.s = TCL_ENCODING_LE; + /* Note: This DEPENDS on TCL_ENCODING_LE being defined in least sig byte */ + isLe.s = 1; + leFlags = isLe.c ? TCL_ENCODING_LE : 0; + Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); @@ -612,7 +621,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; - type.clientData = INT2PTR(isLe.c); + type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf32ToUtfProc; @@ -626,7 +635,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(0); Tcl_CreateEncoding(&type); type.encodingName = "utf-32"; - type.clientData = INT2PTR(isLe.c); + type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); type.toUtfProc = Utf16ToUtfProc; @@ -640,7 +649,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; - type.clientData = INT2PTR(isLe.c|ENCODING_UTF); + type.clientData = INT2PTR(leFlags|ENCODING_UTF); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED @@ -1160,8 +1169,6 @@ Tcl_ExternalToUtfDString( * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80 - * to 0x00. Only valid for "utf-8" and "cesu-8". * Any other flag bits will cause an error to be returned (for future * compatibility) * @@ -1484,8 +1491,6 @@ Tcl_UtfToExternalDString( * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * - TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 instead - * of 0x00. Only valid for "utf-8" and "cesu-8". * * Results: * The return value is one of @@ -2462,7 +2467,7 @@ BinaryProc( static int UtfToUtfProc( - void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ + void *clientData, /* additional flags */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* TCL_ENCODING_* conversion control flags. */ @@ -2531,7 +2536,7 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && - (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && + (UCHAR(src[1]) == 0x80) && (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || PROFILE_REPLACE(profile))) { /* Special sequence \xC0\x80 */ -- cgit v0.12 From 806babb9326b7729d3c104f8167662d45d0e5eaf Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 04:48:00 +0000 Subject: Add encoding test vector files --- tests/cmdAH.test | 5 +- tests/encodingVectors.tcl | 655 ++++++++++++++++ tests/icuUcmTests.tcl | 1891 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 2547 insertions(+), 4 deletions(-) create mode 100644 tests/encodingVectors.tcl create mode 100644 tests/icuUcmTests.tcl diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 5a48453..634c3c4 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -187,10 +187,7 @@ set "numargErrors(encoding convertto)" {wrong # args: should be "(encoding |::tc set "numargErrors(encoding names)" {wrong # args: should be "encoding names"} set "numargErrors(encoding profiles)" {wrong # args: should be "encoding profiles"} -# TODO - encodingVectors not currently in tcl9 branch -if {[file exists [file join [file dirname [info script]] encodingVectors.tcl]]} { - source [file join [file dirname [info script]] encodingVectors.tcl] -} +source [file join [file dirname [info script]] encodingVectors.tcl] # Maps utf-{16,32}{le,be} to utf-16, utf-32 and diff --git a/tests/encodingVectors.tcl b/tests/encodingVectors.tcl new file mode 100644 index 0000000..b3f3efa --- /dev/null +++ b/tests/encodingVectors.tcl @@ -0,0 +1,655 @@ +# This file contains test vectors for verifying various encodings. They are +# stored in a common file so that they can be sourced into the various test +# modules that are dependent on encodings. This file contains statically defined +# test vectors. In addition, it sources the ICU-generated test vectors from +# icuUcmTests.tcl. +# +# Note that sourcing the file will reinitialize any existing encoding test +# vectors. +# + +# List of defined encoding profiles +set encProfiles {tcl8 strict replace} +set encDefaultProfile tcl8; # Should reflect the default from implementation + +# encValidStrings - Table of valid strings. +# +# Each row is +# The pair should be unique for generated test ids to be unique. +# STR is a string that can be encoded in the encoding ENCODING resulting +# in the byte sequence BYTES. The CTRL field is a list that controls test +# generation. It may contain zero or more of `solo`, `lead`, `tail` and +# `middle` indicating that the generated tests should include the string +# by itself, as the lead of a longer string, as the tail of a longer string +# and in the middle of a longer string. If CTRL is empty, it is treated as +# containing all four of the above. The CTRL field may also contain the +# words knownBug or knownW3C which will cause the test generation for that +# vector to be skipped. +# +# utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +set encValidStrings {}; # Reset the table + +lappend encValidStrings {*}{ + ascii \u0000 00 {} {Lowest ASCII} + ascii \u007F 7F knownBug {Highest ASCII} + ascii \u007D 7D {} {Brace - just to verify test scripts are escaped correctly} + ascii \u007B 7B {} {Terminating brace - just to verify test scripts are escaped correctly} + + utf-8 \u0000 00 {} {Unicode Table 3.7 Row 1} + utf-8 \u007F 7F {} {Unicode Table 3.7 Row 1} + utf-8 \u0080 C280 {} {Unicode Table 3.7 Row 2} + utf-8 \u07FF DFBF {} {Unicode Table 3.7 Row 2} + utf-8 \u0800 E0A080 {} {Unicode Table 3.7 Row 3} + utf-8 \u0FFF E0BFBF {} {Unicode Table 3.7 Row 3} + utf-8 \u1000 E18080 {} {Unicode Table 3.7 Row 4} + utf-8 \uCFFF ECBFBF {} {Unicode Table 3.7 Row 4} + utf-8 \uD000 ED8080 {} {Unicode Table 3.7 Row 5} + utf-8 \uD7FF ED9FBF {} {Unicode Table 3.7 Row 5} + utf-8 \uE000 EE8080 {} {Unicode Table 3.7 Row 6} + utf-8 \uFFFF EFBFBF {} {Unicode Table 3.7 Row 6} + utf-8 \U10000 F0908080 {} {Unicode Table 3.7 Row 7} + utf-8 \U3FFFF F0BFBFBF {} {Unicode Table 3.7 Row 7} + utf-8 \U40000 F1808080 {} {Unicode Table 3.7 Row 8} + utf-8 \UFFFFF F3BFBFBF {} {Unicode Table 3.7 Row 8} + utf-8 \U100000 F4808080 {} {Unicode Table 3.7 Row 9} + utf-8 \U10FFFF F48FBFBF {} {Unicode Table 3.7 Row 9} + utf-8 A\u03A9\u8A9E\U00010384 41CEA9E8AA9EF0908E84 {} {Unicode 2.5} + + utf-16le \u0000 0000 {} {Lowest code unit} + utf-16le \uD7FF FFD7 {} {Below high surrogate range} + utf-16le \uE000 00E0 {} {Above low surrogate range} + utf-16le \uFFFF FFFF {} {Highest code unit} + utf-16le \U010000 00D800DC {} {First surrogate pair} + utf-16le \U10FFFF FFDBFFDF {} {First surrogate pair} + utf-16le A\u03A9\u8A9E\U00010384 4100A9039E8A00D884DF {} {Unicode 2.5} + + utf-16be \u0000 0000 {} {Lowest code unit} + utf-16be \uD7FF D7FF {} {Below high surrogate range} + utf-16be \uE000 E000 {} {Above low surrogate range} + utf-16be \uFFFF FFFF {} {Highest code unit} + utf-16be \U010000 D800DC00 {} {First surrogate pair} + utf-16be \U10FFFF DBFFDFFF {} {First surrogate pair} + utf-16be A\u03A9\u8A9E\U00010384 004103A98A9ED800DF84 {} {Unicode 2.5} + + utf-32le \u0000 00000000 {} {Lowest code unit} + utf-32le \uFFFF FFFF0000 {} {Highest BMP} + utf-32le \U010000 00000100 {} {First supplementary} + utf-32le \U10FFFF ffff1000 {} {Last supplementary} + utf-32le A\u03A9\u8A9E\U00010384 41000000A90300009E8A000084030100 {} {Unicode 2.5} + + utf-32be \u0000 00000000 {} {Lowest code unit} + utf-32be \uFFFF 0000FFFF {} {Highest BMP} + utf-32be \U010000 00010000 {} {First supplementary} + utf-32be \U10FFFF 0010FFFF {} {Last supplementary} + utf-32be A\u03A9\u8A9E\U00010384 00000041000003A900008A9E00010384 {} {Unicode 2.5} +} + +# encInvalidBytes - Table of invalid byte sequences +# These are byte sequences that should appear for an encoding. Each row is +# of the form +# +# The triple should be unique for test ids to be +# unique. BYTES is a byte sequence that is invalid. EXPECTEDRESULT is the +# expected string when the bytes are decoded using the PROFILE profile. +# FAILINDEX gives the expected index of the invalid byte under that profile. The +# CTRL field is a list that controls test generation. It may contain zero or +# more of `solo`, `lead`, `tail` and `middle` indicating that the generated the +# tail of a longer and in the middle of a longer string. If empty, it is treated +# as containing all four of the above. The CTRL field may also contain the words +# knownBug or knownW3C which will cause the test generation for that vector to +# be skipped. +# +# utf-32 missing because they are automatically generated based on le/be +# versions. +set encInvalidBytes {}; # Reset the table + +# ascii - Any byte above 127 is invalid and is mapped +# to the same numeric code point except for the range +# 80-9F which is treated as cp1252. +# This tests the TableToUtfProc code path. +lappend encInvalidBytes {*}{ + ascii 80 tcl8 \u20AC -1 {knownBug} {map to cp1252} + ascii 80 replace \uFFFD -1 {} {Smallest invalid byte} + ascii 80 strict {} 0 {} {Smallest invalid byte} + ascii 81 tcl8 \u0081 -1 {knownBug} {map to cp1252} + ascii 82 tcl8 \u201A -1 {knownBug} {map to cp1252} + ascii 83 tcl8 \u0192 -1 {knownBug} {map to cp1252} + ascii 84 tcl8 \u201E -1 {knownBug} {map to cp1252} + ascii 85 tcl8 \u2026 -1 {knownBug} {map to cp1252} + ascii 86 tcl8 \u2020 -1 {knownBug} {map to cp1252} + ascii 87 tcl8 \u2021 -1 {knownBug} {map to cp1252} + ascii 88 tcl8 \u0276 -1 {knownBug} {map to cp1252} + ascii 89 tcl8 \u2030 -1 {knownBug} {map to cp1252} + ascii 8A tcl8 \u0160 -1 {knownBug} {map to cp1252} + ascii 8B tcl8 \u2039 -1 {knownBug} {map to cp1252} + ascii 8C tcl8 \u0152 -1 {knownBug} {map to cp1252} + ascii 8D tcl8 \u008D -1 {knownBug} {map to cp1252} + ascii 8E tcl8 \u017D -1 {knownBug} {map to cp1252} + ascii 8F tcl8 \u008F -1 {knownBug} {map to cp1252} + ascii 90 tcl8 \u0090 -1 {knownBug} {map to cp1252} + ascii 91 tcl8 \u2018 -1 {knownBug} {map to cp1252} + ascii 92 tcl8 \u2019 -1 {knownBug} {map to cp1252} + ascii 93 tcl8 \u201C -1 {knownBug} {map to cp1252} + ascii 94 tcl8 \u201D -1 {knownBug} {map to cp1252} + ascii 95 tcl8 \u2022 -1 {knownBug} {map to cp1252} + ascii 96 tcl8 \u2013 -1 {knownBug} {map to cp1252} + ascii 97 tcl8 \u2014 -1 {knownBug} {map to cp1252} + ascii 98 tcl8 \u02DC -1 {knownBug} {map to cp1252} + ascii 99 tcl8 \u2122 -1 {knownBug} {map to cp1252} + ascii 9A tcl8 \u0161 -1 {knownBug} {map to cp1252} + ascii 9B tcl8 \u203A -1 {knownBug} {map to cp1252} + ascii 9C tcl8 \u0153 -1 {knownBug} {map to cp1252} + ascii 9D tcl8 \u009D -1 {knownBug} {map to cp1252} + ascii 9E tcl8 \u017E -1 {knownBug} {map to cp1252} + ascii 9F tcl8 \u0178 -1 {knownBug} {map to cp1252} + + ascii FF tcl8 \u00FF -1 {} {Largest invalid byte} + ascii FF replace \uFFFD -1 {} {Largest invalid byte} + ascii FF strict {} 0 {} {Largest invalid byte} +} + +# utf-8 - valid sequences based on Table 3.7 in the Unicode +# standard. +# +# Code Points First Second Third Fourth Byte +# U+0000..U+007F 00..7F +# U+0080..U+07FF C2..DF 80..BF +# U+0800..U+0FFF E0 A0..BF 80..BF +# U+1000..U+CFFF E1..EC 80..BF 80..BF +# U+D000..U+D7FF ED 80..9F 80..BF +# U+E000..U+FFFF EE..EF 80..BF 80..BF +# U+10000..U+3FFFF F0 90..BF 80..BF 80..BF +# U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF +# U+100000..U+10FFFF F4 80..8F 80..BF 80..BF +# +# Tests below are based on the "gaps" in the above table. Note ascii test +# values are repeated because internally a different code path is used +# (UtfToUtfProc). +# Note C0, C1, F5:FF are invalid bytes ANYWHERE. Exception is C080 +lappend encInvalidBytes {*}{ + utf-8 80 tcl8 \u20AC -1 {} {map to cp1252} + utf-8 80 replace \uFFFD -1 {} {Smallest invalid byte} + utf-8 80 strict {} 0 {} {Smallest invalid byte} + utf-8 81 tcl8 \u0081 -1 {} {map to cp1252} + utf-8 82 tcl8 \u201A -1 {} {map to cp1252} + utf-8 83 tcl8 \u0192 -1 {} {map to cp1252} + utf-8 84 tcl8 \u201E -1 {} {map to cp1252} + utf-8 85 tcl8 \u2026 -1 {} {map to cp1252} + utf-8 86 tcl8 \u2020 -1 {} {map to cp1252} + utf-8 87 tcl8 \u2021 -1 {} {map to cp1252} + utf-8 88 tcl8 \u02C6 -1 {} {map to cp1252} + utf-8 89 tcl8 \u2030 -1 {} {map to cp1252} + utf-8 8A tcl8 \u0160 -1 {} {map to cp1252} + utf-8 8B tcl8 \u2039 -1 {} {map to cp1252} + utf-8 8C tcl8 \u0152 -1 {} {map to cp1252} + utf-8 8D tcl8 \u008D -1 {} {map to cp1252} + utf-8 8E tcl8 \u017D -1 {} {map to cp1252} + utf-8 8F tcl8 \u008F -1 {} {map to cp1252} + utf-8 90 tcl8 \u0090 -1 {} {map to cp1252} + utf-8 91 tcl8 \u2018 -1 {} {map to cp1252} + utf-8 92 tcl8 \u2019 -1 {} {map to cp1252} + utf-8 93 tcl8 \u201C -1 {} {map to cp1252} + utf-8 94 tcl8 \u201D -1 {} {map to cp1252} + utf-8 95 tcl8 \u2022 -1 {} {map to cp1252} + utf-8 96 tcl8 \u2013 -1 {} {map to cp1252} + utf-8 97 tcl8 \u2014 -1 {} {map to cp1252} + utf-8 98 tcl8 \u02DC -1 {} {map to cp1252} + utf-8 99 tcl8 \u2122 -1 {} {map to cp1252} + utf-8 9A tcl8 \u0161 -1 {} {map to cp1252} + utf-8 9B tcl8 \u203A -1 {} {map to cp1252} + utf-8 9C tcl8 \u0153 -1 {} {map to cp1252} + utf-8 9D tcl8 \u009D -1 {} {map to cp1252} + utf-8 9E tcl8 \u017E -1 {} {map to cp1252} + utf-8 9F tcl8 \u0178 -1 {} {map to cp1252} + + utf-8 C0 tcl8 \u00C0 -1 {} {C0 is invalid anywhere} + utf-8 C0 strict {} 0 {} {C0 is invalid anywhere} + utf-8 C0 replace \uFFFD -1 {} {C0 is invalid anywhere} + utf-8 C080 tcl8 \u0000 -1 {} {C080 -> U+0 in Tcl's internal modified UTF8} + utf-8 C080 strict {} 0 {} {C080 -> invalid} + utf-8 C080 replace \uFFFD -1 {} {C080 -> single replacement char} + utf-8 C0A2 tcl8 \u00C0\u00A2 -1 {} {websec.github.io - A} + utf-8 C0A2 replace \uFFFD\uFFFD -1 {} {websec.github.io - A} + utf-8 C0A2 strict {} 0 {} {websec.github.io - A} + utf-8 C0A7 tcl8 \u00C0\u00A7 -1 {} {websec.github.io - double quote} + utf-8 C0A7 replace \uFFFD\uFFFD -1 {} {websec.github.io - double quote} + utf-8 C0A7 strict {} 0 {} {websec.github.io - double quote} + utf-8 C0AE tcl8 \u00C0\u00AE -1 {} {websec.github.io - full stop} + utf-8 C0AE replace \uFFFD\uFFFD -1 {} {websec.github.io - full stop} + utf-8 C0AE strict {} 0 {} {websec.github.io - full stop} + utf-8 C0AF tcl8 \u00C0\u00AF -1 {} {websec.github.io - solidus} + utf-8 C0AF replace \uFFFD\uFFFD -1 {} {websec.github.io - solidus} + utf-8 C0AF strict {} 0 {} {websec.github.io - solidus} + + utf-8 C1 tcl8 \u00C1 -1 {} {C1 is invalid everywhere} + utf-8 C1 replace \uFFFD -1 {} {C1 is invalid everywhere} + utf-8 C1 strict {} 0 {} {C1 is invalid everywhere} + utf-8 C181 tcl8 \u00C1\u0081 -1 {} {websec.github.io - base test (A)} + utf-8 C181 replace \uFFFD\uFFFD -1 {} {websec.github.io - base test (A)} + utf-8 C181 strict {} 0 {} {websec.github.io - base test (A)} + utf-8 C19C tcl8 \u00C1\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 C19C replace \uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 C19C strict {} 0 {} {websec.github.io - reverse solidus} + + utf-8 C2 tcl8 \u00C2 -1 {} {Missing trail byte} + utf-8 C2 replace \uFFFD -1 {} {Missing trail byte} + utf-8 C2 strict {} 0 {} {Missing trail byte} + utf-8 C27F tcl8 \u00C2\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 C27F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DF tcl8 \u00DF -1 {} {Missing trail byte} + utf-8 DF replace \uFFFD -1 {} {Missing trail byte} + utf-8 DF strict {} 0 {} {Missing trail byte} + utf-8 DF7F tcl8 \u00DF\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 DF7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 DFE0A080 tcl8 \u00DF\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 replace \uFFFD\u0800 -1 {} {Invalid trail byte is start of valid sequence} + utf-8 DFE0A080 strict {} 0 {} {Invalid trail byte is start of valid sequence} + + utf-8 E0 tcl8 \u00E0 -1 {} {Missing trail byte} + utf-8 E0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E0 strict {} 0 {} {Missing trail byte} + utf-8 E080 tcl8 \u00E0\u20AC -1 {} {First trail byte must be A0:BF} + utf-8 E080 replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E080 strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0819C tcl8 \u00E0\u0081\u0153 -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C replace \uFFFD\uFFFD\uFFFD -1 {} {websec.github.io - reverse solidus} + utf-8 E0819C strict {} 0 {} {websec.github.io - reverse solidus} + utf-8 E09F tcl8 \u00E0\u0178 -1 {} {First trail byte must be A0:BF} + utf-8 E09F replace \uFFFD\uFFFD -1 {} {First trail byte must be A0:BF} + utf-8 E09F strict {} 0 {} {First trail byte must be A0:BF} + utf-8 E0A0 tcl8 \u00E0\u00A0 -1 {} {Missing second trail byte} + utf-8 E0A0 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0A0 strict {} 0 {} {Missing second trail byte} + utf-8 E0BF tcl8 \u00E0\u00BF -1 {} {Missing second trail byte} + utf-8 E0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E0BF strict {} 0 {} {Missing second trail byte} + utf-8 E0A07F tcl8 \u00E0\u00A0\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0A07F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0A07F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F tcl8 \u00E0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 E1 tcl8 \u00E1 -1 {} {Missing trail byte} + utf-8 E1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 E1 strict {} 0 {} {Missing trail byte} + utf-8 E17F tcl8 \u00E1\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 E17F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 E181 tcl8 \u00E1\u0081 -1 {} {Missing second trail byte} + utf-8 E181 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E181 strict {} 0 {} {Missing second trail byte} + utf-8 E1BF tcl8 \u00E1\u00BF -1 {} {Missing second trail byte} + utf-8 E1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 E1BF strict {} 0 {} {Missing second trail byte} + utf-8 E1807F tcl8 \u00E1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F tcl8 \u00E1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 E1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 E1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EC tcl8 \u00EC -1 {} {Missing trail byte} + utf-8 EC replace \uFFFD -1 {} {Missing trail byte} + utf-8 EC strict {} 0 {} {Missing trail byte} + utf-8 EC7F tcl8 \u00EC\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F replace \uFFFD\x7F -1 {} {Trail byte must be 80:BF} + utf-8 EC7F strict {} 0 {} {Trail byte must be 80:BF} + utf-8 EC81 tcl8 \u00EC\u0081 -1 {} {Missing second trail byte} + utf-8 EC81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EC81 strict {} 0 {} {Missing second trail byte} + utf-8 ECBF tcl8 \u00EC\u00BF -1 {} {Missing second trail byte} + utf-8 ECBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ECBF strict {} 0 {} {Missing second trail byte} + utf-8 EC807F tcl8 \u00EC\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EC807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EC807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F tcl8 \u00EC\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ECBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ECBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 ED tcl8 \u00ED -1 {} {Missing trail byte} + utf-8 ED replace \uFFFD -1 {} {Missing trail byte} + utf-8 ED strict {} 0 {} {Missing trail byte} + utf-8 ED7F tcl8 \u00ED\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:9F} + utf-8 ED7F strict {} 0 {} {First trail byte must be 80:9F} + utf-8 EDA0 tcl8 \u00ED\u00A0 -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:9F} + utf-8 EDA0 strict {} 0 {} {First trail byte must be 80:9F} + utf-8 ED81 tcl8 \u00ED\u0081 -1 {} {Missing second trail byte} + utf-8 ED81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 ED81 strict {} 0 {} {Missing second trail byte} + utf-8 EDBF tcl8 \u00ED\u00BF -1 {} {Missing second trail byte} + utf-8 EDBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EDBF strict {} 0 {} {Missing second trail byte} + utf-8 ED807F tcl8 \u00ED\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ED807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F tcl8 \u00ED\u0178\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 ED9F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 ED9F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EDA080 tcl8 \uD800 -1 {} {High surrogate} + utf-8 EDA080 replace \uFFFD -1 {knownBug} {High surrogate} + utf-8 EDA080 strict {} 0 {} {High surrogate} + utf-8 EDAFBF tcl8 \uDBFF -1 {} {High surrogate} + utf-8 EDAFBF replace \uFFFD -1 {knownBug} {High surrogate} + utf-8 EDAFBF strict {} 0 {} {High surrogate} + utf-8 EDB080 tcl8 \uDC00 -1 {} {Low surrogate} + utf-8 EDB080 replace \uFFFD -1 {knownBug} {Low surrogate} + utf-8 EDB080 strict {} 0 {} {Low surrogate} + utf-8 EDBFBF tcl8 \uDFFF -1 {knownBug} {Low surrogate} + utf-8 EDBFBF replace \uFFFD -1 {knownBug} {Low surrogate} + utf-8 EDBFBF strict {} 0 {} {Low surrogate} + utf-8 EDA080EDB080 tcl8 \U00010000 -1 {knownBug} {High low surrogate pair} + utf-8 EDA080EDB080 replace \uFFFD\uFFFD -1 {knownBug} {High low surrogate pair} + utf-8 EDA080EDB080 strict {} 0 {} {High low surrogate pair} + utf-8 EDAFBFEDBFBF tcl8 \U0010FFFF -1 {knownBug} {High low surrogate pair} + utf-8 EDAFBFEDBFBF replace \uFFFD\uFFFD -1 {knownBug} {High low surrogate pair} + utf-8 EDAFBFEDBFBF strict {} 0 {} {High low surrogate pair} + + utf-8 EE tcl8 \u00EE -1 {} {Missing trail byte} + utf-8 EE replace \uFFFD -1 {} {Missing trail byte} + utf-8 EE strict {} 0 {} {Missing trail byte} + utf-8 EE7F tcl8 \u00EE\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EE7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EE7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EED0 tcl8 \u00EE\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 EED0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EED0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EE81 tcl8 \u00EE\u0081 -1 {} {Missing second trail byte} + utf-8 EE81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EE81 strict {} 0 {} {Missing second trail byte} + utf-8 EEBF tcl8 \u00EE\u00BF -1 {} {Missing second trail byte} + utf-8 EEBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EEBF strict {} 0 {} {Missing second trail byte} + utf-8 EE807F tcl8 \u00EE\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EE807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EE807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F tcl8 \u00EE\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EEBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EEBF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EF tcl8 \u00EF -1 {} {Missing trail byte} + utf-8 EF replace \uFFFD -1 {} {Missing trail byte} + utf-8 EF strict {} 0 {} {Missing trail byte} + utf-8 EF7F tcl8 \u00EF\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EF7F replace \uFFFD\u7F -1 {} {First trail byte must be 80:BF} + utf-8 EF7F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EFD0 tcl8 \u00EF\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 EFD0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 EFD0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 EF81 tcl8 \u00EF\u0081 -1 {} {Missing second trail byte} + utf-8 EF81 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EF81 strict {} 0 {} {Missing second trail byte} + utf-8 EFBF tcl8 \u00EF\u00BF -1 {} {Missing second trail byte} + utf-8 EFBF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 EFBF strict {} 0 {} {Missing second trail byte} + utf-8 EF807F tcl8 \u00EF\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EF807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EF807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F tcl8 \u00EF\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 EFBF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 EFBF7F strict {} 0 {} {Second trail byte must be 80:BF} + + utf-8 F0 tcl8 \u00F0 -1 {} {Missing trail byte} + utf-8 F0 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F0 strict {} 0 {} {Missing trail byte} + utf-8 F080 tcl8 \u00F0\u20AC -1 {} {First trail byte must be 90:BF} + utf-8 F080 replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F080 strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F08F tcl8 \u00F0\u8F -1 {} {First trail byte must be 90:BF} + utf-8 F08F replace \uFFFD -1 {knownW3C} {First trail byte must be 90:BF} + utf-8 F08F strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F0D0 tcl8 \u00F0\u00D0 -1 {} {First trail byte must be 90:BF} + utf-8 F0D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 90:BF} + utf-8 F0D0 strict {} 0 {} {First trail byte must be 90:BF} + utf-8 F090 tcl8 \u00F0\u0090 -1 {} {Missing second trail byte} + utf-8 F090 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F090 strict {} 0 {} {Missing second trail byte} + utf-8 F0BF tcl8 \u00F0\u00BF -1 {} {Missing second trail byte} + utf-8 F0BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F0BF strict {} 0 {} {Missing second trail byte} + utf-8 F0907F tcl8 \u00F0\u0090\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F0907F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0907F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F tcl8 \u00F0\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F0BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F0BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F090BF tcl8 \u00F0\u0090\u00BF -1 {} {Missing third trail byte} + utf-8 F090BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F090BF strict {} 0 {} {Missing third trail byte} + utf-8 F0BF81 tcl8 \u00F0\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F0BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F0BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F0BF807F tcl8 \u00F0\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F0BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F0BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 tcl8 \u00F0\u0090\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F090BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F090BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F1 tcl8 \u00F1 -1 {} {Missing trail byte} + utf-8 F1 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F1 strict {} 0 {} {Missing trail byte} + utf-8 F17F tcl8 \u00F1\u7F -1 {} {First trail byte must be 80:BF} + utf-8 F17F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F17F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F1D0 tcl8 \u00F1\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 F1D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F1D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F180 tcl8 \u00F1\u20AC -1 {} {Missing second trail byte} + utf-8 F180 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F180 strict {} 0 {} {Missing second trail byte} + utf-8 F1BF tcl8 \u00F1\u00BF -1 {} {Missing second trail byte} + utf-8 F1BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F1BF strict {} 0 {} {Missing second trail byte} + utf-8 F1807F tcl8 \u00F1\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F1807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F tcl8 \u00F1\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F1BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F1BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F180BF tcl8 \u00F1\u20AC\u00BF -1 {} {Missing third trail byte} + utf-8 F180BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F180BF strict {} 0 {} {Missing third trail byte} + utf-8 F1BF81 tcl8 \u00F1\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F1BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F1BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F1BF807F tcl8 \u00F1\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F1BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F1BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 tcl8 \u00F1\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F180BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F180BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F3 tcl8 \u00F3 -1 {} {Missing trail byte} + utf-8 F3 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F3 strict {} 0 {} {Missing trail byte} + utf-8 F37F tcl8 \u00F3\x7F -1 {} {First trail byte must be 80:BF} + utf-8 F37F replace \uFFFD -1 {knownW3C} {First trail byte must be 80:BF} + utf-8 F37F strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F3D0 tcl8 \u00F3\u00D0 -1 {} {First trail byte must be 80:BF} + utf-8 F3D0 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:BF} + utf-8 F3D0 strict {} 0 {} {First trail byte must be 80:BF} + utf-8 F380 tcl8 \u00F3\u20AC -1 {} {Missing second trail byte} + utf-8 F380 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F380 strict {} 0 {} {Missing second trail byte} + utf-8 F3BF tcl8 \u00F3\u00BF -1 {} {Missing second trail byte} + utf-8 F3BF replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F3BF strict {} 0 {} {Missing second trail byte} + utf-8 F3807F tcl8 \u00F3\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F3807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F tcl8 \u00F3\u00BF\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F3BF7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F3BF7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F380BF tcl8 \u00F3\u20AC\u00BF -1 {} {Missing third trail byte} + utf-8 F380BF replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F380BF strict {} 0 {} {Missing third trail byte} + utf-8 F3BF81 tcl8 \u00F3\u00BF\u0081 -1 {} {Missing third trail byte} + utf-8 F3BF81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F3BF81 strict {} 0 {} {Missing third trail byte} + utf-8 F3BF807F tcl8 \u00F3\u00BF\u20AC\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F3BF817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F3BF817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 tcl8 \u00F3\u20AC\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F380BFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F380BFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F4 tcl8 \u00F4 -1 {} {Missing trail byte} + utf-8 F4 replace \uFFFD -1 {} {Missing trail byte} + utf-8 F4 strict {} 0 {} {Missing trail byte} + utf-8 F47F tcl8 \u00F4\u7F -1 {} {First trail byte must be 80:8F} + utf-8 F47F replace \uFFFD\u7F -1 {knownW3C} {First trail byte must be 80:8F} + utf-8 F47F strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F490 tcl8 \u00F4\u0090 -1 {} {First trail byte must be 80:8F} + utf-8 F490 replace \uFFFD\uFFFD -1 {} {First trail byte must be 80:8F} + utf-8 F490 strict {} 0 {} {First trail byte must be 80:8F} + utf-8 F480 tcl8 \u00F4\u20AC -1 {} {Missing second trail byte} + utf-8 F480 replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F480 strict {} 0 {} {Missing second trail byte} + utf-8 F48F tcl8 \u00F4\u008F -1 {} {Missing second trail byte} + utf-8 F48F replace \uFFFD -1 {knownW3C} {Missing second trail byte} + utf-8 F48F strict {} 0 {} {Missing second trail byte} + utf-8 F4807F tcl8 \u00F4\u20AC\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F4807F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F4807F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48F7F tcl8 \u00F4\u008F\x7F -1 {} {Second trail byte must be 80:BF} + utf-8 F48F7F replace \uFFFD\u7F -1 {knownW3C} {Second trail byte must be 80:BF} + utf-8 F48F7F strict {} 0 {} {Second trail byte must be 80:BF} + utf-8 F48081 tcl8 \u00F4\u20AC\u0081 -1 {} {Missing third trail byte} + utf-8 F48081 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48081 strict {} 0 {} {Missing third trail byte} + utf-8 F48F81 tcl8 \u00F4\u008F\u0081 -1 {} {Missing third trail byte} + utf-8 F48F81 replace \uFFFD -1 {knownW3C} {Missing third trail byte} + utf-8 F48F81 strict {} 0 {} {Missing third trail byte} + utf-8 F481817F tcl8 \u00F4\u0081\u0081\x7F -1 {} {Third trail byte must be 80:BF} + utf-8 F480817F replace \uFFFD\x7F -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F480817F strict {} 0 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 tcl8 \u00F4\u008F\u00BF\u00D0 -1 {} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 replace \uFFFD -1 {knownW3C} {Third trail byte must be 80:BF} + utf-8 F48FBFD0 strict {} 0 {} {Third trail byte must be 80:BF} + + utf-8 F5 tcl8 \u00F5 -1 {} {F5:FF are invalid everywhere} + utf-8 F5 replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 F5 strict {} 0 {} {F5:FF are invalid everywhere} + utf-8 FF tcl8 \u00FF -1 {} {F5:FF are invalid everywhere} + utf-8 FF replace \uFFFD -1 {} {F5:FF are invalid everywhere} + utf-8 FF strict {} 0 {} {F5:FF are invalid everywhere} + + utf-8 C0AFE080BFF0818130 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-8} + utf-8 EDA080EDBFBFEDAF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3-9} + utf-8 F4919293FF4180BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\uFFFD\u0041\uFFFD\uFFFD\x30 -1 {} {Unicode Table 3-10} + utf-8 E180E2F09192F1BF30 replace \uFFFD\uFFFD\uFFFD\uFFFD\x30 -1 {knownW3C} {Unicode Table 3.11} +} + +# utf16-le and utf16-be test cases. Note utf16 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-16le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16le 41 strict {} 0 {solo tail} {Truncated} + utf-16le 00D8 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16le 00D8 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16le 00D8 strict {} 0 {knownBug} {Missing low surrogate} + utf-16le 00DC tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16le 00DC replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16le 00DC strict {} 0 {knownBug} {Missing high surrogate} + + utf-16be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-16be 41 strict {} 0 {solo tail} {Truncated} + utf-16be D800 tcl8 \uD800 -1 {} {Missing low surrogate} + utf-16be D800 replace \uFFFD -1 {knownBug} {Missing low surrogate} + utf-16be D800 strict {} 0 {knownBug} {Missing low surrogate} + utf-16be DC00 tcl8 \uDC00 -1 {} {Missing high surrogate} + utf-16be DC00 replace \uFFFD -1 {knownBug} {Missing high surrogate} + utf-16be DC00 strict {} 0 {knownBug} {Missing high surrogate} +} + +# utf32-le and utf32-be test cases. Note utf32 cases are automatically generated +# based on these depending on platform endianness. Note truncated tests can only +# happen when the sequence is at the end (including by itself) Thus {solo tail} +# in some cases. +lappend encInvalidBytes {*}{ + utf-32le 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 41 replace \uFFFD -1 {solo} {Truncated} + utf-32le 41 strict {} 0 {solo tail} {Truncated} + utf-32le 4100 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 4100 replace \uFFFD -1 {solo} {Truncated} + utf-32le 4100 strict {} 0 {solo tail} {Truncated} + utf-32le 410000 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32le 410000 replace \uFFFD -1 {solo} {Truncated} + utf-32le 410000 strict {} 0 {solo tail} {Truncated} + utf-32le 00D80000 tcl8 \uD800 -1 {} {High-surrogate} + utf-32le 00D80000 replace \uFFFD -1 {} {High-surrogate} + utf-32le 00D80000 strict {} 0 {} {High-surrogate} + utf-32le 00DC0000 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32le 00DC0000 replace \uFFFD -1 {} {Low-surrogate} + utf-32le 00DC0000 strict {} 0 {} {Low-surrogate} + utf-32le 00D8000000DC0000 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32le 00D8000000DC0000 strict {} 0 {} {High-low-surrogate-pair} + utf-32le 00001100 tcl8 \UFFFD -1 {} {Out of range} + utf-32le 00001100 replace \UFFFD -1 {} {Out of range} + utf-32le 00001100 strict {} 0 {} {Out of range} + utf-32le FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32le FFFFFFFF strict {} 0 {} {Out of range} + + utf-32be 41 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 replace \uFFFD -1 {solo tail} {Truncated} + utf-32be 41 strict {} 0 {solo tail} {Truncated} + utf-32be 0041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 0041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 0041 strict {} 0 {solo tail} {Truncated} + utf-32be 000041 tcl8 \uFFFD -1 {solo tail} {Truncated} + utf-32be 000041 replace \uFFFD -1 {solo} {Truncated} + utf-32be 000041 strict {} 0 {solo tail} {Truncated} + utf-32be 0000D800 tcl8 \uD800 -1 {} {High-surrogate} + utf-32be 0000D800 replace \uFFFD -1 {} {High-surrogate} + utf-32be 0000D800 strict {} 0 {} {High-surrogate} + utf-32be 0000DC00 tcl8 \uDC00 -1 {} {Low-surrogate} + utf-32be 0000DC00 replace \uFFFD -1 {} {Low-surrogate} + utf-32be 0000DC00 strict {} 0 {} {Low-surrogate} + utf-32be 0000D8000000DC00 tcl8 \uD800\uDC00 -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 replace \uFFFD\uFFFD -1 {} {High-low-surrogate-pair} + utf-32be 0000D8000000DC00 strict {} 0 {} {High-low-surrogate-pair} + utf-32be 00110000 tcl8 \UFFFD -1 {} {Out of range} + utf-32be 00110000 replace \UFFFD -1 {} {Out of range} + utf-32be 00110000 strict {} 0 {} {Out of range} + utf-32be FFFFFFFF tcl8 \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF replace \UFFFD -1 {} {Out of range} + utf-32be FFFFFFFF strict {} 0 {} {Out of range} +} + +# Strings that cannot be encoded for specific encoding / profiles +# +# should be unique for test ids to be unique. +# See earlier comments about CTRL field. +# +# Note utf-16, utf-32 missing because they are automatically +# generated based on le/be versions. +# TODO - out of range code point (note cannot be generated by \U notation) +lappend encUnencodableStrings {*}{ + ascii \u00e0 tcl8 3f -1 {} {unencodable} + ascii \u00e0 strict {} 0 {} {unencodable} + + iso8859-1 \u0141 tcl8 3f -1 {} unencodable + iso8859-1 \u0141 strict {} 0 {} unencodable + + utf-8 \uD800 tcl8 eda080 -1 {} High-surrogate + utf-8 \uD800 strict {} 0 {} High-surrogate + utf-8 \uDC00 tcl8 edb080 -1 {} High-surrogate + utf-8 \uDC00 strict {} 0 {} High-surrogate +} + + +# The icuUcmTests.tcl is generated by the tools/ucm2tests.tcl script +# and generates test vectors for the above tables for various encodings +# based on ICU UCM files. +# TODO - commented out for now as generating a lot of mismatches. +# source [file join [file dirname [info script]] icuUcmTests.tcl] diff --git a/tests/icuUcmTests.tcl b/tests/icuUcmTests.tcl new file mode 100644 index 0000000..0c4071f --- /dev/null +++ b/tests/icuUcmTests.tcl @@ -0,0 +1,1891 @@ + +# This file is automatically generated by ucm2tests.tcl. +# Edits will be overwritten on next generation. +# +# Generates tests comparing Tcl encodings to ICU. +# The generated file is NOT standalone. It should be sourced into a test script. + +proc ucmConvertfromMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertfrom -profile strict $enc [binary decode hex $hex]] ne $unich} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +proc ucmConverttoMismatches {enc map} { + set mismatches {} + foreach {unihex hex} $map { + set unihex [string range 00000000$unihex end-7 end]; # Make 8 digits + set unich [subst "\\U$unihex"] + if {[encoding convertto -profile strict $enc $unich] ne [binary decode hex $hex]} { + lappend mismatches "<[printable $unich],$hex>" + } + } + return $mismatches +} +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + + +# +# cp1250 (generated from glibc-CP1250-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1250 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1250 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00BB BB 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A5 0105 B9 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D BC 013E BE 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A 8C 015B 9C 015E AA 015F BA 0160 8A 0161 9A 0162 DE 0163 FE 0164 8D 0165 9D 016E D9 016F F9 0170 DB 0171 FB 0179 8F 017A 9F 017B AF 017C BF 017D 8E 017E 9E 02C7 A1 02D8 A2 02D9 FF 02DB B2 02DD BD 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1250 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1250 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00BB BB 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A5 0105 B9 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D BC 013E BE 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A 8C 015B 9C 015E AA 015F BA 0160 8A 0161 9A 0162 DE 0163 FE 0164 8D 0165 9D 016E D9 016F F9 0170 DB 0171 FB 0179 8F 017A 9F 017B AF 017C BF 017D 8E 017E 9E 02C7 A1 02D8 A2 02D9 FF 02DB B2 02DD BD 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1250 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1250 81 tcl8 \U00000081 -1 {} {} + cp1250 81 replace \uFFFD -1 {} {} + cp1250 81 strict {} 0 {} {} + cp1250 83 tcl8 \U00000083 -1 {} {} + cp1250 83 replace \uFFFD -1 {} {} + cp1250 83 strict {} 0 {} {} + cp1250 88 tcl8 \U00000088 -1 {} {} + cp1250 88 replace \uFFFD -1 {} {} + cp1250 88 strict {} 0 {} {} + cp1250 90 tcl8 \U00000090 -1 {} {} + cp1250 90 replace \uFFFD -1 {} {} + cp1250 90 strict {} 0 {} {} + cp1250 98 tcl8 \U00000098 -1 {} {} + cp1250 98 replace \uFFFD -1 {} {} + cp1250 98 strict {} 0 {} {} +}; # cp1250 + +# cp1250 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1250 \U00000080 tcl8 1A -1 {} {} + cp1250 \U00000080 replace 1A -1 {} {} + cp1250 \U00000080 strict {} 0 {} {} + cp1250 \U00000400 tcl8 1A -1 {} {} + cp1250 \U00000400 replace 1A -1 {} {} + cp1250 \U00000400 strict {} 0 {} {} + cp1250 \U0000D800 tcl8 1A -1 {} {} + cp1250 \U0000D800 replace 1A -1 {} {} + cp1250 \U0000D800 strict {} 0 {} {} + cp1250 \U0000DC00 tcl8 1A -1 {} {} + cp1250 \U0000DC00 replace 1A -1 {} {} + cp1250 \U0000DC00 strict {} 0 {} {} + cp1250 \U00010000 tcl8 1A -1 {} {} + cp1250 \U00010000 replace 1A -1 {} {} + cp1250 \U00010000 strict {} 0 {} {} + cp1250 \U0010FFFF tcl8 1A -1 {} {} + cp1250 \U0010FFFF replace 1A -1 {} {} + cp1250 \U0010FFFF strict {} 0 {} {} +}; # cp1250 + +# +# cp1251 (generated from glibc-CP1251-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1251 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1251 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B5 B5 00B6 B6 00B7 B7 00BB BB 0401 A8 0402 80 0403 81 0404 AA 0405 BD 0406 B2 0407 AF 0408 A3 0409 8A 040A 8C 040B 8E 040C 8D 040E A1 040F 8F 0410 C0 0411 C1 0412 C2 0413 C3 0414 C4 0415 C5 0416 C6 0417 C7 0418 C8 0419 C9 041A CA 041B CB 041C CC 041D CD 041E CE 041F CF 0420 D0 0421 D1 0422 D2 0423 D3 0424 D4 0425 D5 0426 D6 0427 D7 0428 D8 0429 D9 042A DA 042B DB 042C DC 042D DD 042E DE 042F DF 0430 E0 0431 E1 0432 E2 0433 E3 0434 E4 0435 E5 0436 E6 0437 E7 0438 E8 0439 E9 043A EA 043B EB 043C EC 043D ED 043E EE 043F EF 0440 F0 0441 F1 0442 F2 0443 F3 0444 F4 0445 F5 0446 F6 0447 F7 0448 F8 0449 F9 044A FA 044B FB 044C FC 044D FD 044E FE 044F FF 0451 B8 0452 90 0453 83 0454 BA 0455 BE 0456 B3 0457 BF 0458 BC 0459 9A 045A 9C 045B 9E 045C 9D 045E A2 045F 9F 0490 A5 0491 B4 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 88 2116 B9 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1251 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1251 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B5 B5 00B6 B6 00B7 B7 00BB BB 0401 A8 0402 80 0403 81 0404 AA 0405 BD 0406 B2 0407 AF 0408 A3 0409 8A 040A 8C 040B 8E 040C 8D 040E A1 040F 8F 0410 C0 0411 C1 0412 C2 0413 C3 0414 C4 0415 C5 0416 C6 0417 C7 0418 C8 0419 C9 041A CA 041B CB 041C CC 041D CD 041E CE 041F CF 0420 D0 0421 D1 0422 D2 0423 D3 0424 D4 0425 D5 0426 D6 0427 D7 0428 D8 0429 D9 042A DA 042B DB 042C DC 042D DD 042E DE 042F DF 0430 E0 0431 E1 0432 E2 0433 E3 0434 E4 0435 E5 0436 E6 0437 E7 0438 E8 0439 E9 043A EA 043B EB 043C EC 043D ED 043E EE 043F EF 0440 F0 0441 F1 0442 F2 0443 F3 0444 F4 0445 F5 0446 F6 0447 F7 0448 F8 0449 F9 044A FA 044B FB 044C FC 044D FD 044E FE 044F FF 0451 B8 0452 90 0453 83 0454 BA 0455 BE 0456 B3 0457 BF 0458 BC 0459 9A 045A 9C 045B 9E 045C 9D 045E A2 045F 9F 0490 A5 0491 B4 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 88 2116 B9 2122 99} +} -result {} + +# cp1251 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1251 98 tcl8 \U00000098 -1 {} {} + cp1251 98 replace \uFFFD -1 {} {} + cp1251 98 strict {} 0 {} {} +}; # cp1251 + +# cp1251 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1251 \U00000080 tcl8 1A -1 {} {} + cp1251 \U00000080 replace 1A -1 {} {} + cp1251 \U00000080 strict {} 0 {} {} + cp1251 \U00000400 tcl8 1A -1 {} {} + cp1251 \U00000400 replace 1A -1 {} {} + cp1251 \U00000400 strict {} 0 {} {} + cp1251 \U0000D800 tcl8 1A -1 {} {} + cp1251 \U0000D800 replace 1A -1 {} {} + cp1251 \U0000D800 strict {} 0 {} {} + cp1251 \U0000DC00 tcl8 1A -1 {} {} + cp1251 \U0000DC00 replace 1A -1 {} {} + cp1251 \U0000DC00 strict {} 0 {} {} + cp1251 \U00010000 tcl8 1A -1 {} {} + cp1251 \U00010000 replace 1A -1 {} {} + cp1251 \U00010000 strict {} 0 {} {} + cp1251 \U0010FFFF tcl8 1A -1 {} {} + cp1251 \U0010FFFF replace 1A -1 {} {} + cp1251 \U0010FFFF strict {} 0 {} {} +}; # cp1251 + +# +# cp1252 (generated from glibc-CP1252-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1252 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1252 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 8C 0153 9C 0160 8A 0161 9A 0178 9F 017D 8E 017E 9E 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1252 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1252 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 8C 0153 9C 0160 8A 0161 9A 0178 9F 017D 8E 017E 9E 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1252 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1252 81 tcl8 \U00000081 -1 {} {} + cp1252 81 replace \uFFFD -1 {} {} + cp1252 81 strict {} 0 {} {} + cp1252 8D tcl8 \U0000008D -1 {} {} + cp1252 8D replace \uFFFD -1 {} {} + cp1252 8D strict {} 0 {} {} + cp1252 8F tcl8 \U0000008F -1 {} {} + cp1252 8F replace \uFFFD -1 {} {} + cp1252 8F strict {} 0 {} {} + cp1252 90 tcl8 \U00000090 -1 {} {} + cp1252 90 replace \uFFFD -1 {} {} + cp1252 90 strict {} 0 {} {} + cp1252 9D tcl8 \U0000009D -1 {} {} + cp1252 9D replace \uFFFD -1 {} {} + cp1252 9D strict {} 0 {} {} +}; # cp1252 + +# cp1252 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1252 \U00000080 tcl8 1A -1 {} {} + cp1252 \U00000080 replace 1A -1 {} {} + cp1252 \U00000080 strict {} 0 {} {} + cp1252 \U00000400 tcl8 1A -1 {} {} + cp1252 \U00000400 replace 1A -1 {} {} + cp1252 \U00000400 strict {} 0 {} {} + cp1252 \U0000D800 tcl8 1A -1 {} {} + cp1252 \U0000D800 replace 1A -1 {} {} + cp1252 \U0000D800 strict {} 0 {} {} + cp1252 \U0000DC00 tcl8 1A -1 {} {} + cp1252 \U0000DC00 replace 1A -1 {} {} + cp1252 \U0000DC00 strict {} 0 {} {} + cp1252 \U00010000 tcl8 1A -1 {} {} + cp1252 \U00010000 replace 1A -1 {} {} + cp1252 \U00010000 strict {} 0 {} {} + cp1252 \U0010FFFF tcl8 1A -1 {} {} + cp1252 \U0010FFFF replace 1A -1 {} {} + cp1252 \U0010FFFF strict {} 0 {} {} +}; # cp1252 + +# +# cp1253 (generated from glibc-CP1253-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1253 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1253 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00BB BB 00BD BD 0192 83 0384 B4 0385 A1 0386 A2 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2013 96 2014 97 2015 AF 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1253 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1253 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00BB BB 00BD BD 0192 83 0384 B4 0385 A1 0386 A2 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2013 96 2014 97 2015 AF 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1253 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1253 81 tcl8 \U00000081 -1 {} {} + cp1253 81 replace \uFFFD -1 {} {} + cp1253 81 strict {} 0 {} {} + cp1253 88 tcl8 \U00000088 -1 {} {} + cp1253 88 replace \uFFFD -1 {} {} + cp1253 88 strict {} 0 {} {} + cp1253 8A tcl8 \U0000008A -1 {} {} + cp1253 8A replace \uFFFD -1 {} {} + cp1253 8A strict {} 0 {} {} + cp1253 8C tcl8 \U0000008C -1 {} {} + cp1253 8C replace \uFFFD -1 {} {} + cp1253 8C strict {} 0 {} {} + cp1253 8D tcl8 \U0000008D -1 {} {} + cp1253 8D replace \uFFFD -1 {} {} + cp1253 8D strict {} 0 {} {} + cp1253 8E tcl8 \U0000008E -1 {} {} + cp1253 8E replace \uFFFD -1 {} {} + cp1253 8E strict {} 0 {} {} + cp1253 8F tcl8 \U0000008F -1 {} {} + cp1253 8F replace \uFFFD -1 {} {} + cp1253 8F strict {} 0 {} {} + cp1253 90 tcl8 \U00000090 -1 {} {} + cp1253 90 replace \uFFFD -1 {} {} + cp1253 90 strict {} 0 {} {} + cp1253 98 tcl8 \U00000098 -1 {} {} + cp1253 98 replace \uFFFD -1 {} {} + cp1253 98 strict {} 0 {} {} + cp1253 9A tcl8 \U0000009A -1 {} {} + cp1253 9A replace \uFFFD -1 {} {} + cp1253 9A strict {} 0 {} {} + cp1253 9C tcl8 \U0000009C -1 {} {} + cp1253 9C replace \uFFFD -1 {} {} + cp1253 9C strict {} 0 {} {} + cp1253 9D tcl8 \U0000009D -1 {} {} + cp1253 9D replace \uFFFD -1 {} {} + cp1253 9D strict {} 0 {} {} + cp1253 9E tcl8 \U0000009E -1 {} {} + cp1253 9E replace \uFFFD -1 {} {} + cp1253 9E strict {} 0 {} {} + cp1253 9F tcl8 \U0000009F -1 {} {} + cp1253 9F replace \uFFFD -1 {} {} + cp1253 9F strict {} 0 {} {} + cp1253 AA tcl8 \U000000AA -1 {} {} + cp1253 AA replace \uFFFD -1 {} {} + cp1253 AA strict {} 0 {} {} + cp1253 D2 tcl8 \U000000D2 -1 {} {} + cp1253 D2 replace \uFFFD -1 {} {} + cp1253 D2 strict {} 0 {} {} + cp1253 FF tcl8 \U000000FF -1 {} {} + cp1253 FF replace \uFFFD -1 {} {} + cp1253 FF strict {} 0 {} {} +}; # cp1253 + +# cp1253 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1253 \U00000080 tcl8 1A -1 {} {} + cp1253 \U00000080 replace 1A -1 {} {} + cp1253 \U00000080 strict {} 0 {} {} + cp1253 \U00000400 tcl8 1A -1 {} {} + cp1253 \U00000400 replace 1A -1 {} {} + cp1253 \U00000400 strict {} 0 {} {} + cp1253 \U0000D800 tcl8 1A -1 {} {} + cp1253 \U0000D800 replace 1A -1 {} {} + cp1253 \U0000D800 strict {} 0 {} {} + cp1253 \U0000DC00 tcl8 1A -1 {} {} + cp1253 \U0000DC00 replace 1A -1 {} {} + cp1253 \U0000DC00 strict {} 0 {} {} + cp1253 \U00010000 tcl8 1A -1 {} {} + cp1253 \U00010000 replace 1A -1 {} {} + cp1253 \U00010000 strict {} 0 {} {} + cp1253 \U0010FFFF tcl8 1A -1 {} {} + cp1253 \U0010FFFF replace 1A -1 {} {} + cp1253 \U0010FFFF strict {} 0 {} {} +}; # cp1253 + +# +# cp1254 (generated from glibc-CP1254-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1254 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1254 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 0152 8C 0153 9C 015E DE 015F FE 0160 8A 0161 9A 0178 9F 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1254 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1254 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 0152 8C 0153 9C 015E DE 015F FE 0160 8A 0161 9A 0178 9F 0192 83 02C6 88 02DC 98 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1254 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1254 81 tcl8 \U00000081 -1 {} {} + cp1254 81 replace \uFFFD -1 {} {} + cp1254 81 strict {} 0 {} {} + cp1254 8D tcl8 \U0000008D -1 {} {} + cp1254 8D replace \uFFFD -1 {} {} + cp1254 8D strict {} 0 {} {} + cp1254 8E tcl8 \U0000008E -1 {} {} + cp1254 8E replace \uFFFD -1 {} {} + cp1254 8E strict {} 0 {} {} + cp1254 8F tcl8 \U0000008F -1 {} {} + cp1254 8F replace \uFFFD -1 {} {} + cp1254 8F strict {} 0 {} {} + cp1254 90 tcl8 \U00000090 -1 {} {} + cp1254 90 replace \uFFFD -1 {} {} + cp1254 90 strict {} 0 {} {} + cp1254 9D tcl8 \U0000009D -1 {} {} + cp1254 9D replace \uFFFD -1 {} {} + cp1254 9D strict {} 0 {} {} + cp1254 9E tcl8 \U0000009E -1 {} {} + cp1254 9E replace \uFFFD -1 {} {} + cp1254 9E strict {} 0 {} {} +}; # cp1254 + +# cp1254 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1254 \U00000080 tcl8 1A -1 {} {} + cp1254 \U00000080 replace 1A -1 {} {} + cp1254 \U00000080 strict {} 0 {} {} + cp1254 \U00000400 tcl8 1A -1 {} {} + cp1254 \U00000400 replace 1A -1 {} {} + cp1254 \U00000400 strict {} 0 {} {} + cp1254 \U0000D800 tcl8 1A -1 {} {} + cp1254 \U0000D800 replace 1A -1 {} {} + cp1254 \U0000D800 strict {} 0 {} {} + cp1254 \U0000DC00 tcl8 1A -1 {} {} + cp1254 \U0000DC00 replace 1A -1 {} {} + cp1254 \U0000DC00 strict {} 0 {} {} + cp1254 \U00010000 tcl8 1A -1 {} {} + cp1254 \U00010000 replace 1A -1 {} {} + cp1254 \U00010000 strict {} 0 {} {} + cp1254 \U0010FFFF tcl8 1A -1 {} {} + cp1254 \U0010FFFF replace 1A -1 {} {} + cp1254 \U0010FFFF strict {} 0 {} {} +}; # cp1254 + +# +# cp1255 (generated from glibc-CP1255-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1255 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1255 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00D7 AA 00F7 BA 0192 83 02C6 88 02DC 98 05B0 C0 05B1 C1 05B2 C2 05B3 C3 05B4 C4 05B5 C5 05B6 C6 05B7 C7 05B8 C8 05B9 C9 05BB CB 05BC CC 05BD CD 05BE CE 05BF CF 05C0 D0 05C1 D1 05C2 D2 05C3 D3 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 05F0 D4 05F1 D5 05F2 D6 05F3 D7 05F4 D8 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AA A4 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1255 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1255 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00D7 AA 00F7 BA 0192 83 02C6 88 02DC 98 05B0 C0 05B1 C1 05B2 C2 05B3 C3 05B4 C4 05B5 C5 05B6 C6 05B7 C7 05B8 C8 05B9 C9 05BB CB 05BC CC 05BD CD 05BE CE 05BF CF 05C0 D0 05C1 D1 05C2 D2 05C3 D3 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 05F0 D4 05F1 D5 05F2 D6 05F3 D7 05F4 D8 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AA A4 20AC 80 2122 99} +} -result {} + +# cp1255 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1255 81 tcl8 \U00000081 -1 {} {} + cp1255 81 replace \uFFFD -1 {} {} + cp1255 81 strict {} 0 {} {} + cp1255 8A tcl8 \U0000008A -1 {} {} + cp1255 8A replace \uFFFD -1 {} {} + cp1255 8A strict {} 0 {} {} + cp1255 8C tcl8 \U0000008C -1 {} {} + cp1255 8C replace \uFFFD -1 {} {} + cp1255 8C strict {} 0 {} {} + cp1255 8D tcl8 \U0000008D -1 {} {} + cp1255 8D replace \uFFFD -1 {} {} + cp1255 8D strict {} 0 {} {} + cp1255 8E tcl8 \U0000008E -1 {} {} + cp1255 8E replace \uFFFD -1 {} {} + cp1255 8E strict {} 0 {} {} + cp1255 8F tcl8 \U0000008F -1 {} {} + cp1255 8F replace \uFFFD -1 {} {} + cp1255 8F strict {} 0 {} {} + cp1255 90 tcl8 \U00000090 -1 {} {} + cp1255 90 replace \uFFFD -1 {} {} + cp1255 90 strict {} 0 {} {} + cp1255 9A tcl8 \U0000009A -1 {} {} + cp1255 9A replace \uFFFD -1 {} {} + cp1255 9A strict {} 0 {} {} + cp1255 9C tcl8 \U0000009C -1 {} {} + cp1255 9C replace \uFFFD -1 {} {} + cp1255 9C strict {} 0 {} {} + cp1255 9D tcl8 \U0000009D -1 {} {} + cp1255 9D replace \uFFFD -1 {} {} + cp1255 9D strict {} 0 {} {} + cp1255 9E tcl8 \U0000009E -1 {} {} + cp1255 9E replace \uFFFD -1 {} {} + cp1255 9E strict {} 0 {} {} + cp1255 9F tcl8 \U0000009F -1 {} {} + cp1255 9F replace \uFFFD -1 {} {} + cp1255 9F strict {} 0 {} {} + cp1255 CA tcl8 \U000000CA -1 {} {} + cp1255 CA replace \uFFFD -1 {} {} + cp1255 CA strict {} 0 {} {} + cp1255 D9 tcl8 \U000000D9 -1 {} {} + cp1255 D9 replace \uFFFD -1 {} {} + cp1255 D9 strict {} 0 {} {} + cp1255 DA tcl8 \U000000DA -1 {} {} + cp1255 DA replace \uFFFD -1 {} {} + cp1255 DA strict {} 0 {} {} + cp1255 DB tcl8 \U000000DB -1 {} {} + cp1255 DB replace \uFFFD -1 {} {} + cp1255 DB strict {} 0 {} {} + cp1255 DC tcl8 \U000000DC -1 {} {} + cp1255 DC replace \uFFFD -1 {} {} + cp1255 DC strict {} 0 {} {} + cp1255 DD tcl8 \U000000DD -1 {} {} + cp1255 DD replace \uFFFD -1 {} {} + cp1255 DD strict {} 0 {} {} + cp1255 DE tcl8 \U000000DE -1 {} {} + cp1255 DE replace \uFFFD -1 {} {} + cp1255 DE strict {} 0 {} {} + cp1255 DF tcl8 \U000000DF -1 {} {} + cp1255 DF replace \uFFFD -1 {} {} + cp1255 DF strict {} 0 {} {} + cp1255 FB tcl8 \U000000FB -1 {} {} + cp1255 FB replace \uFFFD -1 {} {} + cp1255 FB strict {} 0 {} {} + cp1255 FC tcl8 \U000000FC -1 {} {} + cp1255 FC replace \uFFFD -1 {} {} + cp1255 FC strict {} 0 {} {} + cp1255 FF tcl8 \U000000FF -1 {} {} + cp1255 FF replace \uFFFD -1 {} {} + cp1255 FF strict {} 0 {} {} +}; # cp1255 + +# cp1255 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1255 \U00000080 tcl8 1A -1 {} {} + cp1255 \U00000080 replace 1A -1 {} {} + cp1255 \U00000080 strict {} 0 {} {} + cp1255 \U00000400 tcl8 1A -1 {} {} + cp1255 \U00000400 replace 1A -1 {} {} + cp1255 \U00000400 strict {} 0 {} {} + cp1255 \U0000D800 tcl8 1A -1 {} {} + cp1255 \U0000D800 replace 1A -1 {} {} + cp1255 \U0000D800 strict {} 0 {} {} + cp1255 \U0000DC00 tcl8 1A -1 {} {} + cp1255 \U0000DC00 replace 1A -1 {} {} + cp1255 \U0000DC00 strict {} 0 {} {} + cp1255 \U00010000 tcl8 1A -1 {} {} + cp1255 \U00010000 replace 1A -1 {} {} + cp1255 \U00010000 strict {} 0 {} {} + cp1255 \U0010FFFF tcl8 1A -1 {} {} + cp1255 \U0010FFFF replace 1A -1 {} {} + cp1255 \U0010FFFF strict {} 0 {} {} +}; # cp1255 + +# +# cp1256 (generated from glibc-CP1256-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1256 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1256 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 D7 00E0 E0 00E2 E2 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EE EE 00EF EF 00F4 F4 00F7 F7 00F9 F9 00FB FB 00FC FC 0152 8C 0153 9C 0192 83 02C6 88 060C A1 061B BA 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D8 0638 D9 0639 DA 063A DB 0640 DC 0641 DD 0642 DE 0643 DF 0644 E1 0645 E3 0646 E4 0647 E5 0648 E6 0649 EC 064A ED 064B F0 064C F1 064D F2 064E F3 064F F5 0650 F6 0651 F8 0652 FA 0679 8A 067E 81 0686 8D 0688 8F 0691 9A 0698 8E 06A9 98 06AF 90 06BA 9F 06BE AA 06C1 C0 06D2 FF 200C 9D 200D 9E 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1256 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1256 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 D7 00E0 E0 00E2 E2 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EE EE 00EF EF 00F4 F4 00F7 F7 00F9 F9 00FB FB 00FC FC 0152 8C 0153 9C 0192 83 02C6 88 060C A1 061B BA 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D8 0638 D9 0639 DA 063A DB 0640 DC 0641 DD 0642 DE 0643 DF 0644 E1 0645 E3 0646 E4 0647 E5 0648 E6 0649 EC 064A ED 064B F0 064C F1 064D F2 064E F3 064F F5 0650 F6 0651 F8 0652 FA 0679 8A 067E 81 0686 8D 0688 8F 0691 9A 0698 8E 06A9 98 06AF 90 06BA 9F 06BE AA 06C1 C0 06D2 FF 200C 9D 200D 9E 200E FD 200F FE 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1256 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # cp1256 + +# cp1256 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1256 \U00000080 tcl8 1A -1 {} {} + cp1256 \U00000080 replace 1A -1 {} {} + cp1256 \U00000080 strict {} 0 {} {} + cp1256 \U00000400 tcl8 1A -1 {} {} + cp1256 \U00000400 replace 1A -1 {} {} + cp1256 \U00000400 strict {} 0 {} {} + cp1256 \U0000D800 tcl8 1A -1 {} {} + cp1256 \U0000D800 replace 1A -1 {} {} + cp1256 \U0000D800 strict {} 0 {} {} + cp1256 \U0000DC00 tcl8 1A -1 {} {} + cp1256 \U0000DC00 replace 1A -1 {} {} + cp1256 \U0000DC00 strict {} 0 {} {} + cp1256 \U00010000 tcl8 1A -1 {} {} + cp1256 \U00010000 replace 1A -1 {} {} + cp1256 \U00010000 strict {} 0 {} {} + cp1256 \U0010FFFF tcl8 1A -1 {} {} + cp1256 \U0010FFFF replace 1A -1 {} {} + cp1256 \U0010FFFF strict {} 0 {} {} +}; # cp1256 + +# +# cp1257 (generated from glibc-CP1257-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1257 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1257 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A8 8D 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF 9D 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 8F 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 02C7 8E 02D9 FF 02DB 9E 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1257 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1257 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A8 8D 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF 9D 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 8F 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 02C7 8E 02D9 FF 02DB 9E 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AC 80 2122 99} +} -result {} + +# cp1257 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1257 81 tcl8 \U00000081 -1 {} {} + cp1257 81 replace \uFFFD -1 {} {} + cp1257 81 strict {} 0 {} {} + cp1257 83 tcl8 \U00000083 -1 {} {} + cp1257 83 replace \uFFFD -1 {} {} + cp1257 83 strict {} 0 {} {} + cp1257 88 tcl8 \U00000088 -1 {} {} + cp1257 88 replace \uFFFD -1 {} {} + cp1257 88 strict {} 0 {} {} + cp1257 8A tcl8 \U0000008A -1 {} {} + cp1257 8A replace \uFFFD -1 {} {} + cp1257 8A strict {} 0 {} {} + cp1257 8C tcl8 \U0000008C -1 {} {} + cp1257 8C replace \uFFFD -1 {} {} + cp1257 8C strict {} 0 {} {} + cp1257 90 tcl8 \U00000090 -1 {} {} + cp1257 90 replace \uFFFD -1 {} {} + cp1257 90 strict {} 0 {} {} + cp1257 98 tcl8 \U00000098 -1 {} {} + cp1257 98 replace \uFFFD -1 {} {} + cp1257 98 strict {} 0 {} {} + cp1257 9A tcl8 \U0000009A -1 {} {} + cp1257 9A replace \uFFFD -1 {} {} + cp1257 9A strict {} 0 {} {} + cp1257 9C tcl8 \U0000009C -1 {} {} + cp1257 9C replace \uFFFD -1 {} {} + cp1257 9C strict {} 0 {} {} + cp1257 9F tcl8 \U0000009F -1 {} {} + cp1257 9F replace \uFFFD -1 {} {} + cp1257 9F strict {} 0 {} {} + cp1257 A1 tcl8 \U000000A1 -1 {} {} + cp1257 A1 replace \uFFFD -1 {} {} + cp1257 A1 strict {} 0 {} {} + cp1257 A5 tcl8 \U000000A5 -1 {} {} + cp1257 A5 replace \uFFFD -1 {} {} + cp1257 A5 strict {} 0 {} {} +}; # cp1257 + +# cp1257 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1257 \U00000080 tcl8 1A -1 {} {} + cp1257 \U00000080 replace 1A -1 {} {} + cp1257 \U00000080 strict {} 0 {} {} + cp1257 \U00000400 tcl8 1A -1 {} {} + cp1257 \U00000400 replace 1A -1 {} {} + cp1257 \U00000400 strict {} 0 {} {} + cp1257 \U0000D800 tcl8 1A -1 {} {} + cp1257 \U0000D800 replace 1A -1 {} {} + cp1257 \U0000D800 strict {} 0 {} {} + cp1257 \U0000DC00 tcl8 1A -1 {} {} + cp1257 \U0000DC00 replace 1A -1 {} {} + cp1257 \U0000DC00 strict {} 0 {} {} + cp1257 \U00010000 tcl8 1A -1 {} {} + cp1257 \U00010000 replace 1A -1 {} {} + cp1257 \U00010000 strict {} 0 {} {} + cp1257 \U0010FFFF tcl8 1A -1 {} {} + cp1257 \U0010FFFF replace 1A -1 {} {} + cp1257 \U0010FFFF strict {} 0 {} {} +}; # cp1257 + +# +# cp1258 (generated from glibc-CP1258-2.1.2) + +test encoding-convertfrom-ucmCompare-cp1258 {Compare against ICU UCM} -body { + ucmConvertfromMismatches cp1258 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CD CD 00CE CE 00CF CF 00D1 D1 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00ED ED 00EE EE 00EF EF 00F1 F1 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0110 D0 0111 F0 0152 8C 0153 9C 0178 9F 0192 83 01A0 D5 01A1 F5 01AF DD 01B0 FD 02C6 88 02DC 98 0300 CC 0303 DE 0309 D2 0323 F2 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AB FE 20AC 80 2122 99} +} -result {} + +test encoding-convertto-ucmCompare-cp1258 {Compare against ICU UCM} -body { + ucmConverttoMismatches cp1258 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CD CD 00CE CE 00CF CF 00D1 D1 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00ED ED 00EE EE 00EF EF 00F1 F1 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0110 D0 0111 F0 0152 8C 0153 9C 0178 9F 0192 83 01A0 D5 01A1 F5 01AF DD 01B0 FD 02C6 88 02DC 98 0300 CC 0303 DE 0309 D2 0323 F2 2013 96 2014 97 2018 91 2019 92 201A 82 201C 93 201D 94 201E 84 2020 86 2021 87 2022 95 2026 85 2030 89 2039 8B 203A 9B 20AB FE 20AC 80 2122 99} +} -result {} + +# cp1258 - invalid byte sequences +lappend encInvalidBytes {*}{ + cp1258 81 tcl8 \U00000081 -1 {} {} + cp1258 81 replace \uFFFD -1 {} {} + cp1258 81 strict {} 0 {} {} + cp1258 8A tcl8 \U0000008A -1 {} {} + cp1258 8A replace \uFFFD -1 {} {} + cp1258 8A strict {} 0 {} {} + cp1258 8D tcl8 \U0000008D -1 {} {} + cp1258 8D replace \uFFFD -1 {} {} + cp1258 8D strict {} 0 {} {} + cp1258 8E tcl8 \U0000008E -1 {} {} + cp1258 8E replace \uFFFD -1 {} {} + cp1258 8E strict {} 0 {} {} + cp1258 8F tcl8 \U0000008F -1 {} {} + cp1258 8F replace \uFFFD -1 {} {} + cp1258 8F strict {} 0 {} {} + cp1258 90 tcl8 \U00000090 -1 {} {} + cp1258 90 replace \uFFFD -1 {} {} + cp1258 90 strict {} 0 {} {} + cp1258 9A tcl8 \U0000009A -1 {} {} + cp1258 9A replace \uFFFD -1 {} {} + cp1258 9A strict {} 0 {} {} + cp1258 9D tcl8 \U0000009D -1 {} {} + cp1258 9D replace \uFFFD -1 {} {} + cp1258 9D strict {} 0 {} {} + cp1258 9E tcl8 \U0000009E -1 {} {} + cp1258 9E replace \uFFFD -1 {} {} + cp1258 9E strict {} 0 {} {} + cp1258 EC tcl8 \U000000EC -1 {} {} + cp1258 EC replace \uFFFD -1 {} {} + cp1258 EC strict {} 0 {} {} +}; # cp1258 + +# cp1258 - invalid byte sequences +lappend encUnencodableStrings {*}{ + cp1258 \U00000080 tcl8 1A -1 {} {} + cp1258 \U00000080 replace 1A -1 {} {} + cp1258 \U00000080 strict {} 0 {} {} + cp1258 \U00000400 tcl8 1A -1 {} {} + cp1258 \U00000400 replace 1A -1 {} {} + cp1258 \U00000400 strict {} 0 {} {} + cp1258 \U0000D800 tcl8 1A -1 {} {} + cp1258 \U0000D800 replace 1A -1 {} {} + cp1258 \U0000D800 strict {} 0 {} {} + cp1258 \U0000DC00 tcl8 1A -1 {} {} + cp1258 \U0000DC00 replace 1A -1 {} {} + cp1258 \U0000DC00 strict {} 0 {} {} + cp1258 \U00010000 tcl8 1A -1 {} {} + cp1258 \U00010000 replace 1A -1 {} {} + cp1258 \U00010000 strict {} 0 {} {} + cp1258 \U0010FFFF tcl8 1A -1 {} {} + cp1258 \U0010FFFF replace 1A -1 {} {} + cp1258 \U0010FFFF strict {} 0 {} {} +}; # cp1258 + +# +# gb1988 (generated from glibc-GB_1988_80-2.3.3) + +test encoding-convertfrom-ucmCompare-gb1988 {Compare against ICU UCM} -body { + ucmConvertfromMismatches gb1988 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007F 7F 00A5 24 203E 7E} +} -result {} + +test encoding-convertto-ucmCompare-gb1988 {Compare against ICU UCM} -body { + ucmConverttoMismatches gb1988 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007F 7F 00A5 24 203E 7E} +} -result {} + +# gb1988 - invalid byte sequences +lappend encInvalidBytes {*}{ + gb1988 80 tcl8 \U00000080 -1 {} {} + gb1988 80 replace \uFFFD -1 {} {} + gb1988 80 strict {} 0 {} {} + gb1988 81 tcl8 \U00000081 -1 {} {} + gb1988 81 replace \uFFFD -1 {} {} + gb1988 81 strict {} 0 {} {} + gb1988 82 tcl8 \U00000082 -1 {} {} + gb1988 82 replace \uFFFD -1 {} {} + gb1988 82 strict {} 0 {} {} + gb1988 83 tcl8 \U00000083 -1 {} {} + gb1988 83 replace \uFFFD -1 {} {} + gb1988 83 strict {} 0 {} {} + gb1988 84 tcl8 \U00000084 -1 {} {} + gb1988 84 replace \uFFFD -1 {} {} + gb1988 84 strict {} 0 {} {} + gb1988 85 tcl8 \U00000085 -1 {} {} + gb1988 85 replace \uFFFD -1 {} {} + gb1988 85 strict {} 0 {} {} + gb1988 86 tcl8 \U00000086 -1 {} {} + gb1988 86 replace \uFFFD -1 {} {} + gb1988 86 strict {} 0 {} {} + gb1988 87 tcl8 \U00000087 -1 {} {} + gb1988 87 replace \uFFFD -1 {} {} + gb1988 87 strict {} 0 {} {} + gb1988 88 tcl8 \U00000088 -1 {} {} + gb1988 88 replace \uFFFD -1 {} {} + gb1988 88 strict {} 0 {} {} + gb1988 89 tcl8 \U00000089 -1 {} {} + gb1988 89 replace \uFFFD -1 {} {} + gb1988 89 strict {} 0 {} {} + gb1988 8A tcl8 \U0000008A -1 {} {} + gb1988 8A replace \uFFFD -1 {} {} + gb1988 8A strict {} 0 {} {} + gb1988 8B tcl8 \U0000008B -1 {} {} + gb1988 8B replace \uFFFD -1 {} {} + gb1988 8B strict {} 0 {} {} + gb1988 8C tcl8 \U0000008C -1 {} {} + gb1988 8C replace \uFFFD -1 {} {} + gb1988 8C strict {} 0 {} {} + gb1988 8D tcl8 \U0000008D -1 {} {} + gb1988 8D replace \uFFFD -1 {} {} + gb1988 8D strict {} 0 {} {} + gb1988 8E tcl8 \U0000008E -1 {} {} + gb1988 8E replace \uFFFD -1 {} {} + gb1988 8E strict {} 0 {} {} + gb1988 8F tcl8 \U0000008F -1 {} {} + gb1988 8F replace \uFFFD -1 {} {} + gb1988 8F strict {} 0 {} {} + gb1988 90 tcl8 \U00000090 -1 {} {} + gb1988 90 replace \uFFFD -1 {} {} + gb1988 90 strict {} 0 {} {} + gb1988 91 tcl8 \U00000091 -1 {} {} + gb1988 91 replace \uFFFD -1 {} {} + gb1988 91 strict {} 0 {} {} + gb1988 92 tcl8 \U00000092 -1 {} {} + gb1988 92 replace \uFFFD -1 {} {} + gb1988 92 strict {} 0 {} {} + gb1988 93 tcl8 \U00000093 -1 {} {} + gb1988 93 replace \uFFFD -1 {} {} + gb1988 93 strict {} 0 {} {} + gb1988 94 tcl8 \U00000094 -1 {} {} + gb1988 94 replace \uFFFD -1 {} {} + gb1988 94 strict {} 0 {} {} + gb1988 95 tcl8 \U00000095 -1 {} {} + gb1988 95 replace \uFFFD -1 {} {} + gb1988 95 strict {} 0 {} {} + gb1988 96 tcl8 \U00000096 -1 {} {} + gb1988 96 replace \uFFFD -1 {} {} + gb1988 96 strict {} 0 {} {} + gb1988 97 tcl8 \U00000097 -1 {} {} + gb1988 97 replace \uFFFD -1 {} {} + gb1988 97 strict {} 0 {} {} + gb1988 98 tcl8 \U00000098 -1 {} {} + gb1988 98 replace \uFFFD -1 {} {} + gb1988 98 strict {} 0 {} {} + gb1988 99 tcl8 \U00000099 -1 {} {} + gb1988 99 replace \uFFFD -1 {} {} + gb1988 99 strict {} 0 {} {} + gb1988 9A tcl8 \U0000009A -1 {} {} + gb1988 9A replace \uFFFD -1 {} {} + gb1988 9A strict {} 0 {} {} + gb1988 9B tcl8 \U0000009B -1 {} {} + gb1988 9B replace \uFFFD -1 {} {} + gb1988 9B strict {} 0 {} {} + gb1988 9C tcl8 \U0000009C -1 {} {} + gb1988 9C replace \uFFFD -1 {} {} + gb1988 9C strict {} 0 {} {} + gb1988 9D tcl8 \U0000009D -1 {} {} + gb1988 9D replace \uFFFD -1 {} {} + gb1988 9D strict {} 0 {} {} + gb1988 9E tcl8 \U0000009E -1 {} {} + gb1988 9E replace \uFFFD -1 {} {} + gb1988 9E strict {} 0 {} {} + gb1988 9F tcl8 \U0000009F -1 {} {} + gb1988 9F replace \uFFFD -1 {} {} + gb1988 9F strict {} 0 {} {} + gb1988 A0 tcl8 \U000000A0 -1 {} {} + gb1988 A0 replace \uFFFD -1 {} {} + gb1988 A0 strict {} 0 {} {} + gb1988 A1 tcl8 \U000000A1 -1 {} {} + gb1988 A1 replace \uFFFD -1 {} {} + gb1988 A1 strict {} 0 {} {} + gb1988 A2 tcl8 \U000000A2 -1 {} {} + gb1988 A2 replace \uFFFD -1 {} {} + gb1988 A2 strict {} 0 {} {} + gb1988 A3 tcl8 \U000000A3 -1 {} {} + gb1988 A3 replace \uFFFD -1 {} {} + gb1988 A3 strict {} 0 {} {} + gb1988 A4 tcl8 \U000000A4 -1 {} {} + gb1988 A4 replace \uFFFD -1 {} {} + gb1988 A4 strict {} 0 {} {} + gb1988 A5 tcl8 \U000000A5 -1 {} {} + gb1988 A5 replace \uFFFD -1 {} {} + gb1988 A5 strict {} 0 {} {} + gb1988 A6 tcl8 \U000000A6 -1 {} {} + gb1988 A6 replace \uFFFD -1 {} {} + gb1988 A6 strict {} 0 {} {} + gb1988 A7 tcl8 \U000000A7 -1 {} {} + gb1988 A7 replace \uFFFD -1 {} {} + gb1988 A7 strict {} 0 {} {} + gb1988 A8 tcl8 \U000000A8 -1 {} {} + gb1988 A8 replace \uFFFD -1 {} {} + gb1988 A8 strict {} 0 {} {} + gb1988 A9 tcl8 \U000000A9 -1 {} {} + gb1988 A9 replace \uFFFD -1 {} {} + gb1988 A9 strict {} 0 {} {} + gb1988 AA tcl8 \U000000AA -1 {} {} + gb1988 AA replace \uFFFD -1 {} {} + gb1988 AA strict {} 0 {} {} + gb1988 AB tcl8 \U000000AB -1 {} {} + gb1988 AB replace \uFFFD -1 {} {} + gb1988 AB strict {} 0 {} {} + gb1988 AC tcl8 \U000000AC -1 {} {} + gb1988 AC replace \uFFFD -1 {} {} + gb1988 AC strict {} 0 {} {} + gb1988 AD tcl8 \U000000AD -1 {} {} + gb1988 AD replace \uFFFD -1 {} {} + gb1988 AD strict {} 0 {} {} + gb1988 AE tcl8 \U000000AE -1 {} {} + gb1988 AE replace \uFFFD -1 {} {} + gb1988 AE strict {} 0 {} {} + gb1988 AF tcl8 \U000000AF -1 {} {} + gb1988 AF replace \uFFFD -1 {} {} + gb1988 AF strict {} 0 {} {} + gb1988 B0 tcl8 \U000000B0 -1 {} {} + gb1988 B0 replace \uFFFD -1 {} {} + gb1988 B0 strict {} 0 {} {} + gb1988 B1 tcl8 \U000000B1 -1 {} {} + gb1988 B1 replace \uFFFD -1 {} {} + gb1988 B1 strict {} 0 {} {} + gb1988 B2 tcl8 \U000000B2 -1 {} {} + gb1988 B2 replace \uFFFD -1 {} {} + gb1988 B2 strict {} 0 {} {} + gb1988 B3 tcl8 \U000000B3 -1 {} {} + gb1988 B3 replace \uFFFD -1 {} {} + gb1988 B3 strict {} 0 {} {} + gb1988 B4 tcl8 \U000000B4 -1 {} {} + gb1988 B4 replace \uFFFD -1 {} {} + gb1988 B4 strict {} 0 {} {} + gb1988 B5 tcl8 \U000000B5 -1 {} {} + gb1988 B5 replace \uFFFD -1 {} {} + gb1988 B5 strict {} 0 {} {} + gb1988 B6 tcl8 \U000000B6 -1 {} {} + gb1988 B6 replace \uFFFD -1 {} {} + gb1988 B6 strict {} 0 {} {} + gb1988 B7 tcl8 \U000000B7 -1 {} {} + gb1988 B7 replace \uFFFD -1 {} {} + gb1988 B7 strict {} 0 {} {} + gb1988 B8 tcl8 \U000000B8 -1 {} {} + gb1988 B8 replace \uFFFD -1 {} {} + gb1988 B8 strict {} 0 {} {} + gb1988 B9 tcl8 \U000000B9 -1 {} {} + gb1988 B9 replace \uFFFD -1 {} {} + gb1988 B9 strict {} 0 {} {} + gb1988 BA tcl8 \U000000BA -1 {} {} + gb1988 BA replace \uFFFD -1 {} {} + gb1988 BA strict {} 0 {} {} + gb1988 BB tcl8 \U000000BB -1 {} {} + gb1988 BB replace \uFFFD -1 {} {} + gb1988 BB strict {} 0 {} {} + gb1988 BC tcl8 \U000000BC -1 {} {} + gb1988 BC replace \uFFFD -1 {} {} + gb1988 BC strict {} 0 {} {} + gb1988 BD tcl8 \U000000BD -1 {} {} + gb1988 BD replace \uFFFD -1 {} {} + gb1988 BD strict {} 0 {} {} + gb1988 BE tcl8 \U000000BE -1 {} {} + gb1988 BE replace \uFFFD -1 {} {} + gb1988 BE strict {} 0 {} {} + gb1988 BF tcl8 \U000000BF -1 {} {} + gb1988 BF replace \uFFFD -1 {} {} + gb1988 BF strict {} 0 {} {} + gb1988 C0 tcl8 \U000000C0 -1 {} {} + gb1988 C0 replace \uFFFD -1 {} {} + gb1988 C0 strict {} 0 {} {} + gb1988 C1 tcl8 \U000000C1 -1 {} {} + gb1988 C1 replace \uFFFD -1 {} {} + gb1988 C1 strict {} 0 {} {} + gb1988 C2 tcl8 \U000000C2 -1 {} {} + gb1988 C2 replace \uFFFD -1 {} {} + gb1988 C2 strict {} 0 {} {} + gb1988 C3 tcl8 \U000000C3 -1 {} {} + gb1988 C3 replace \uFFFD -1 {} {} + gb1988 C3 strict {} 0 {} {} + gb1988 C4 tcl8 \U000000C4 -1 {} {} + gb1988 C4 replace \uFFFD -1 {} {} + gb1988 C4 strict {} 0 {} {} + gb1988 C5 tcl8 \U000000C5 -1 {} {} + gb1988 C5 replace \uFFFD -1 {} {} + gb1988 C5 strict {} 0 {} {} + gb1988 C6 tcl8 \U000000C6 -1 {} {} + gb1988 C6 replace \uFFFD -1 {} {} + gb1988 C6 strict {} 0 {} {} + gb1988 C7 tcl8 \U000000C7 -1 {} {} + gb1988 C7 replace \uFFFD -1 {} {} + gb1988 C7 strict {} 0 {} {} + gb1988 C8 tcl8 \U000000C8 -1 {} {} + gb1988 C8 replace \uFFFD -1 {} {} + gb1988 C8 strict {} 0 {} {} + gb1988 C9 tcl8 \U000000C9 -1 {} {} + gb1988 C9 replace \uFFFD -1 {} {} + gb1988 C9 strict {} 0 {} {} + gb1988 CA tcl8 \U000000CA -1 {} {} + gb1988 CA replace \uFFFD -1 {} {} + gb1988 CA strict {} 0 {} {} + gb1988 CB tcl8 \U000000CB -1 {} {} + gb1988 CB replace \uFFFD -1 {} {} + gb1988 CB strict {} 0 {} {} + gb1988 CC tcl8 \U000000CC -1 {} {} + gb1988 CC replace \uFFFD -1 {} {} + gb1988 CC strict {} 0 {} {} + gb1988 CD tcl8 \U000000CD -1 {} {} + gb1988 CD replace \uFFFD -1 {} {} + gb1988 CD strict {} 0 {} {} + gb1988 CE tcl8 \U000000CE -1 {} {} + gb1988 CE replace \uFFFD -1 {} {} + gb1988 CE strict {} 0 {} {} + gb1988 CF tcl8 \U000000CF -1 {} {} + gb1988 CF replace \uFFFD -1 {} {} + gb1988 CF strict {} 0 {} {} + gb1988 D0 tcl8 \U000000D0 -1 {} {} + gb1988 D0 replace \uFFFD -1 {} {} + gb1988 D0 strict {} 0 {} {} + gb1988 D1 tcl8 \U000000D1 -1 {} {} + gb1988 D1 replace \uFFFD -1 {} {} + gb1988 D1 strict {} 0 {} {} + gb1988 D2 tcl8 \U000000D2 -1 {} {} + gb1988 D2 replace \uFFFD -1 {} {} + gb1988 D2 strict {} 0 {} {} + gb1988 D3 tcl8 \U000000D3 -1 {} {} + gb1988 D3 replace \uFFFD -1 {} {} + gb1988 D3 strict {} 0 {} {} + gb1988 D4 tcl8 \U000000D4 -1 {} {} + gb1988 D4 replace \uFFFD -1 {} {} + gb1988 D4 strict {} 0 {} {} + gb1988 D5 tcl8 \U000000D5 -1 {} {} + gb1988 D5 replace \uFFFD -1 {} {} + gb1988 D5 strict {} 0 {} {} + gb1988 D6 tcl8 \U000000D6 -1 {} {} + gb1988 D6 replace \uFFFD -1 {} {} + gb1988 D6 strict {} 0 {} {} + gb1988 D7 tcl8 \U000000D7 -1 {} {} + gb1988 D7 replace \uFFFD -1 {} {} + gb1988 D7 strict {} 0 {} {} + gb1988 D8 tcl8 \U000000D8 -1 {} {} + gb1988 D8 replace \uFFFD -1 {} {} + gb1988 D8 strict {} 0 {} {} + gb1988 D9 tcl8 \U000000D9 -1 {} {} + gb1988 D9 replace \uFFFD -1 {} {} + gb1988 D9 strict {} 0 {} {} + gb1988 DA tcl8 \U000000DA -1 {} {} + gb1988 DA replace \uFFFD -1 {} {} + gb1988 DA strict {} 0 {} {} + gb1988 DB tcl8 \U000000DB -1 {} {} + gb1988 DB replace \uFFFD -1 {} {} + gb1988 DB strict {} 0 {} {} + gb1988 DC tcl8 \U000000DC -1 {} {} + gb1988 DC replace \uFFFD -1 {} {} + gb1988 DC strict {} 0 {} {} + gb1988 DD tcl8 \U000000DD -1 {} {} + gb1988 DD replace \uFFFD -1 {} {} + gb1988 DD strict {} 0 {} {} + gb1988 DE tcl8 \U000000DE -1 {} {} + gb1988 DE replace \uFFFD -1 {} {} + gb1988 DE strict {} 0 {} {} + gb1988 DF tcl8 \U000000DF -1 {} {} + gb1988 DF replace \uFFFD -1 {} {} + gb1988 DF strict {} 0 {} {} + gb1988 E0 tcl8 \U000000E0 -1 {} {} + gb1988 E0 replace \uFFFD -1 {} {} + gb1988 E0 strict {} 0 {} {} + gb1988 E1 tcl8 \U000000E1 -1 {} {} + gb1988 E1 replace \uFFFD -1 {} {} + gb1988 E1 strict {} 0 {} {} + gb1988 E2 tcl8 \U000000E2 -1 {} {} + gb1988 E2 replace \uFFFD -1 {} {} + gb1988 E2 strict {} 0 {} {} + gb1988 E3 tcl8 \U000000E3 -1 {} {} + gb1988 E3 replace \uFFFD -1 {} {} + gb1988 E3 strict {} 0 {} {} + gb1988 E4 tcl8 \U000000E4 -1 {} {} + gb1988 E4 replace \uFFFD -1 {} {} + gb1988 E4 strict {} 0 {} {} + gb1988 E5 tcl8 \U000000E5 -1 {} {} + gb1988 E5 replace \uFFFD -1 {} {} + gb1988 E5 strict {} 0 {} {} + gb1988 E6 tcl8 \U000000E6 -1 {} {} + gb1988 E6 replace \uFFFD -1 {} {} + gb1988 E6 strict {} 0 {} {} + gb1988 E7 tcl8 \U000000E7 -1 {} {} + gb1988 E7 replace \uFFFD -1 {} {} + gb1988 E7 strict {} 0 {} {} + gb1988 E8 tcl8 \U000000E8 -1 {} {} + gb1988 E8 replace \uFFFD -1 {} {} + gb1988 E8 strict {} 0 {} {} + gb1988 E9 tcl8 \U000000E9 -1 {} {} + gb1988 E9 replace \uFFFD -1 {} {} + gb1988 E9 strict {} 0 {} {} + gb1988 EA tcl8 \U000000EA -1 {} {} + gb1988 EA replace \uFFFD -1 {} {} + gb1988 EA strict {} 0 {} {} + gb1988 EB tcl8 \U000000EB -1 {} {} + gb1988 EB replace \uFFFD -1 {} {} + gb1988 EB strict {} 0 {} {} + gb1988 EC tcl8 \U000000EC -1 {} {} + gb1988 EC replace \uFFFD -1 {} {} + gb1988 EC strict {} 0 {} {} + gb1988 ED tcl8 \U000000ED -1 {} {} + gb1988 ED replace \uFFFD -1 {} {} + gb1988 ED strict {} 0 {} {} + gb1988 EE tcl8 \U000000EE -1 {} {} + gb1988 EE replace \uFFFD -1 {} {} + gb1988 EE strict {} 0 {} {} + gb1988 EF tcl8 \U000000EF -1 {} {} + gb1988 EF replace \uFFFD -1 {} {} + gb1988 EF strict {} 0 {} {} + gb1988 F0 tcl8 \U000000F0 -1 {} {} + gb1988 F0 replace \uFFFD -1 {} {} + gb1988 F0 strict {} 0 {} {} + gb1988 F1 tcl8 \U000000F1 -1 {} {} + gb1988 F1 replace \uFFFD -1 {} {} + gb1988 F1 strict {} 0 {} {} + gb1988 F2 tcl8 \U000000F2 -1 {} {} + gb1988 F2 replace \uFFFD -1 {} {} + gb1988 F2 strict {} 0 {} {} + gb1988 F3 tcl8 \U000000F3 -1 {} {} + gb1988 F3 replace \uFFFD -1 {} {} + gb1988 F3 strict {} 0 {} {} + gb1988 F4 tcl8 \U000000F4 -1 {} {} + gb1988 F4 replace \uFFFD -1 {} {} + gb1988 F4 strict {} 0 {} {} + gb1988 F5 tcl8 \U000000F5 -1 {} {} + gb1988 F5 replace \uFFFD -1 {} {} + gb1988 F5 strict {} 0 {} {} + gb1988 F6 tcl8 \U000000F6 -1 {} {} + gb1988 F6 replace \uFFFD -1 {} {} + gb1988 F6 strict {} 0 {} {} + gb1988 F7 tcl8 \U000000F7 -1 {} {} + gb1988 F7 replace \uFFFD -1 {} {} + gb1988 F7 strict {} 0 {} {} + gb1988 F8 tcl8 \U000000F8 -1 {} {} + gb1988 F8 replace \uFFFD -1 {} {} + gb1988 F8 strict {} 0 {} {} + gb1988 F9 tcl8 \U000000F9 -1 {} {} + gb1988 F9 replace \uFFFD -1 {} {} + gb1988 F9 strict {} 0 {} {} + gb1988 FA tcl8 \U000000FA -1 {} {} + gb1988 FA replace \uFFFD -1 {} {} + gb1988 FA strict {} 0 {} {} + gb1988 FB tcl8 \U000000FB -1 {} {} + gb1988 FB replace \uFFFD -1 {} {} + gb1988 FB strict {} 0 {} {} + gb1988 FC tcl8 \U000000FC -1 {} {} + gb1988 FC replace \uFFFD -1 {} {} + gb1988 FC strict {} 0 {} {} + gb1988 FD tcl8 \U000000FD -1 {} {} + gb1988 FD replace \uFFFD -1 {} {} + gb1988 FD strict {} 0 {} {} + gb1988 FE tcl8 \U000000FE -1 {} {} + gb1988 FE replace \uFFFD -1 {} {} + gb1988 FE strict {} 0 {} {} + gb1988 FF tcl8 \U000000FF -1 {} {} + gb1988 FF replace \uFFFD -1 {} {} + gb1988 FF strict {} 0 {} {} +}; # gb1988 + +# gb1988 - invalid byte sequences +lappend encUnencodableStrings {*}{ + gb1988 \U00000024 tcl8 1A -1 {} {} + gb1988 \U00000024 replace 1A -1 {} {} + gb1988 \U00000024 strict {} 0 {} {} + gb1988 \U00000400 tcl8 1A -1 {} {} + gb1988 \U00000400 replace 1A -1 {} {} + gb1988 \U00000400 strict {} 0 {} {} + gb1988 \U0000D800 tcl8 1A -1 {} {} + gb1988 \U0000D800 replace 1A -1 {} {} + gb1988 \U0000D800 strict {} 0 {} {} + gb1988 \U0000DC00 tcl8 1A -1 {} {} + gb1988 \U0000DC00 replace 1A -1 {} {} + gb1988 \U0000DC00 strict {} 0 {} {} + gb1988 \U00010000 tcl8 1A -1 {} {} + gb1988 \U00010000 replace 1A -1 {} {} + gb1988 \U00010000 strict {} 0 {} {} + gb1988 \U0010FFFF tcl8 1A -1 {} {} + gb1988 \U0010FFFF replace 1A -1 {} {} + gb1988 \U0010FFFF strict {} 0 {} {} +}; # gb1988 + +# +# iso8859-1 (generated from glibc-ISO_8859_1-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-1 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-1 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-1 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-1 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF} +} -result {} + +# iso8859-1 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-1 + +# iso8859-1 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-1 \U00000400 tcl8 1A -1 {} {} + iso8859-1 \U00000400 replace 1A -1 {} {} + iso8859-1 \U00000400 strict {} 0 {} {} + iso8859-1 \U0000D800 tcl8 1A -1 {} {} + iso8859-1 \U0000D800 replace 1A -1 {} {} + iso8859-1 \U0000D800 strict {} 0 {} {} + iso8859-1 \U0000DC00 tcl8 1A -1 {} {} + iso8859-1 \U0000DC00 replace 1A -1 {} {} + iso8859-1 \U0000DC00 strict {} 0 {} {} + iso8859-1 \U00010000 tcl8 1A -1 {} {} + iso8859-1 \U00010000 replace 1A -1 {} {} + iso8859-1 \U00010000 strict {} 0 {} {} + iso8859-1 \U0010FFFF tcl8 1A -1 {} {} + iso8859-1 \U0010FFFF replace 1A -1 {} {} + iso8859-1 \U0010FFFF strict {} 0 {} {} +}; # iso8859-1 + +# +# iso8859-2 (generated from glibc-ISO_8859_2-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-2 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-2 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A1 0105 B1 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D A5 013E B5 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A A6 015B B6 015E AA 015F BA 0160 A9 0161 B9 0162 DE 0163 FE 0164 AB 0165 BB 016E D9 016F F9 0170 DB 0171 FB 0179 AC 017A BC 017B AF 017C BF 017D AE 017E BE 02C7 B7 02D8 A2 02D9 FF 02DB B2 02DD BD} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-2 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-2 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C9 C9 00CB CB 00CD CD 00CE CE 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00DA DA 00DC DC 00DD DD 00DF DF 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E9 E9 00EB EB 00ED ED 00EE EE 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00FA FA 00FC FC 00FD FD 0102 C3 0103 E3 0104 A1 0105 B1 0106 C6 0107 E6 010C C8 010D E8 010E CF 010F EF 0110 D0 0111 F0 0118 CA 0119 EA 011A CC 011B EC 0139 C5 013A E5 013D A5 013E B5 0141 A3 0142 B3 0143 D1 0144 F1 0147 D2 0148 F2 0150 D5 0151 F5 0154 C0 0155 E0 0158 D8 0159 F8 015A A6 015B B6 015E AA 015F BA 0160 A9 0161 B9 0162 DE 0163 FE 0164 AB 0165 BB 016E D9 016F F9 0170 DB 0171 FB 0179 AC 017A BC 017B AF 017C BF 017D AE 017E BE 02C7 B7 02D8 A2 02D9 FF 02DB B2 02DD BD} +} -result {} + +# iso8859-2 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-2 + +# iso8859-2 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-2 \U000000A1 tcl8 1A -1 {} {} + iso8859-2 \U000000A1 replace 1A -1 {} {} + iso8859-2 \U000000A1 strict {} 0 {} {} + iso8859-2 \U00000400 tcl8 1A -1 {} {} + iso8859-2 \U00000400 replace 1A -1 {} {} + iso8859-2 \U00000400 strict {} 0 {} {} + iso8859-2 \U0000D800 tcl8 1A -1 {} {} + iso8859-2 \U0000D800 replace 1A -1 {} {} + iso8859-2 \U0000D800 strict {} 0 {} {} + iso8859-2 \U0000DC00 tcl8 1A -1 {} {} + iso8859-2 \U0000DC00 replace 1A -1 {} {} + iso8859-2 \U0000DC00 strict {} 0 {} {} + iso8859-2 \U00010000 tcl8 1A -1 {} {} + iso8859-2 \U00010000 replace 1A -1 {} {} + iso8859-2 \U00010000 strict {} 0 {} {} + iso8859-2 \U0010FFFF tcl8 1A -1 {} {} + iso8859-2 \U0010FFFF replace 1A -1 {} {} + iso8859-2 \U0010FFFF strict {} 0 {} {} +}; # iso8859-2 + +# +# iso8859-3 (generated from glibc-ISO_8859_3-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-3 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-3 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B7 B7 00B8 B8 00BD BD 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F9 F9 00FA FA 00FB FB 00FC FC 0108 C6 0109 E6 010A C5 010B E5 011C D8 011D F8 011E AB 011F BB 0120 D5 0121 F5 0124 A6 0125 B6 0126 A1 0127 B1 0130 A9 0131 B9 0134 AC 0135 BC 015C DE 015D FE 015E AA 015F BA 016C DD 016D FD 017B AF 017C BF 02D8 A2 02D9 FF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-3 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-3 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A4 A4 00A7 A7 00A8 A8 00AD AD 00B0 B0 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B7 B7 00B8 B8 00BD BD 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D7 D7 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F7 F7 00F9 F9 00FA FA 00FB FB 00FC FC 0108 C6 0109 E6 010A C5 010B E5 011C D8 011D F8 011E AB 011F BB 0120 D5 0121 F5 0124 A6 0125 B6 0126 A1 0127 B1 0130 A9 0131 B9 0134 AC 0135 BC 015C DE 015D FE 015E AA 015F BA 016C DD 016D FD 017B AF 017C BF 02D8 A2 02D9 FF} +} -result {} + +# iso8859-3 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-3 A5 tcl8 \U000000A5 -1 {} {} + iso8859-3 A5 replace \uFFFD -1 {} {} + iso8859-3 A5 strict {} 0 {} {} + iso8859-3 AE tcl8 \U000000AE -1 {} {} + iso8859-3 AE replace \uFFFD -1 {} {} + iso8859-3 AE strict {} 0 {} {} + iso8859-3 BE tcl8 \U000000BE -1 {} {} + iso8859-3 BE replace \uFFFD -1 {} {} + iso8859-3 BE strict {} 0 {} {} + iso8859-3 C3 tcl8 \U000000C3 -1 {} {} + iso8859-3 C3 replace \uFFFD -1 {} {} + iso8859-3 C3 strict {} 0 {} {} + iso8859-3 D0 tcl8 \U000000D0 -1 {} {} + iso8859-3 D0 replace \uFFFD -1 {} {} + iso8859-3 D0 strict {} 0 {} {} + iso8859-3 E3 tcl8 \U000000E3 -1 {} {} + iso8859-3 E3 replace \uFFFD -1 {} {} + iso8859-3 E3 strict {} 0 {} {} + iso8859-3 F0 tcl8 \U000000F0 -1 {} {} + iso8859-3 F0 replace \uFFFD -1 {} {} + iso8859-3 F0 strict {} 0 {} {} +}; # iso8859-3 + +# iso8859-3 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-3 \U000000A1 tcl8 1A -1 {} {} + iso8859-3 \U000000A1 replace 1A -1 {} {} + iso8859-3 \U000000A1 strict {} 0 {} {} + iso8859-3 \U00000400 tcl8 1A -1 {} {} + iso8859-3 \U00000400 replace 1A -1 {} {} + iso8859-3 \U00000400 strict {} 0 {} {} + iso8859-3 \U0000D800 tcl8 1A -1 {} {} + iso8859-3 \U0000D800 replace 1A -1 {} {} + iso8859-3 \U0000D800 strict {} 0 {} {} + iso8859-3 \U0000DC00 tcl8 1A -1 {} {} + iso8859-3 \U0000DC00 replace 1A -1 {} {} + iso8859-3 \U0000DC00 strict {} 0 {} {} + iso8859-3 \U00010000 tcl8 1A -1 {} {} + iso8859-3 \U00010000 replace 1A -1 {} {} + iso8859-3 \U00010000 strict {} 0 {} {} + iso8859-3 \U0010FFFF tcl8 1A -1 {} {} + iso8859-3 \U0010FFFF replace 1A -1 {} {} + iso8859-3 \U0010FFFF strict {} 0 {} {} +}; # iso8859-3 + +# +# iso8859-4 (generated from glibc-ISO_8859_4-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-4 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-4 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00AF AF 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00DA DA 00DB DB 00DC DC 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00FA FA 00FB FB 00FC FC 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 D0 0111 F0 0112 AA 0113 BA 0116 CC 0117 EC 0118 CA 0119 EA 0122 AB 0123 BB 0128 A5 0129 B5 012A CF 012B EF 012E C7 012F E7 0136 D3 0137 F3 0138 A2 013B A6 013C B6 0145 D1 0146 F1 014A BD 014B BF 014C D2 014D F2 0156 A3 0157 B3 0160 A9 0161 B9 0166 AC 0167 BC 0168 DD 0169 FD 016A DE 016B FE 0172 D9 0173 F9 017D AE 017E BE 02C7 B7 02D9 FF 02DB B2} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-4 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-4 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00A7 A7 00A8 A8 00AD AD 00AF AF 00B0 B0 00B4 B4 00B8 B8 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00DA DA 00DB DB 00DC DC 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00FA FA 00FB FB 00FC FC 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 D0 0111 F0 0112 AA 0113 BA 0116 CC 0117 EC 0118 CA 0119 EA 0122 AB 0123 BB 0128 A5 0129 B5 012A CF 012B EF 012E C7 012F E7 0136 D3 0137 F3 0138 A2 013B A6 013C B6 0145 D1 0146 F1 014A BD 014B BF 014C D2 014D F2 0156 A3 0157 B3 0160 A9 0161 B9 0166 AC 0167 BC 0168 DD 0169 FD 016A DE 016B FE 0172 D9 0173 F9 017D AE 017E BE 02C7 B7 02D9 FF 02DB B2} +} -result {} + +# iso8859-4 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-4 + +# iso8859-4 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-4 \U000000A1 tcl8 1A -1 {} {} + iso8859-4 \U000000A1 replace 1A -1 {} {} + iso8859-4 \U000000A1 strict {} 0 {} {} + iso8859-4 \U00000400 tcl8 1A -1 {} {} + iso8859-4 \U00000400 replace 1A -1 {} {} + iso8859-4 \U00000400 strict {} 0 {} {} + iso8859-4 \U0000D800 tcl8 1A -1 {} {} + iso8859-4 \U0000D800 replace 1A -1 {} {} + iso8859-4 \U0000D800 strict {} 0 {} {} + iso8859-4 \U0000DC00 tcl8 1A -1 {} {} + iso8859-4 \U0000DC00 replace 1A -1 {} {} + iso8859-4 \U0000DC00 strict {} 0 {} {} + iso8859-4 \U00010000 tcl8 1A -1 {} {} + iso8859-4 \U00010000 replace 1A -1 {} {} + iso8859-4 \U00010000 strict {} 0 {} {} + iso8859-4 \U0010FFFF tcl8 1A -1 {} {} + iso8859-4 \U0010FFFF replace 1A -1 {} {} + iso8859-4 \U0010FFFF strict {} 0 {} {} +}; # iso8859-4 + +# +# iso8859-5 (generated from glibc-ISO_8859_5-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-5 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-5 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 FD 00AD AD 0401 A1 0402 A2 0403 A3 0404 A4 0405 A5 0406 A6 0407 A7 0408 A8 0409 A9 040A AA 040B AB 040C AC 040E AE 040F AF 0410 B0 0411 B1 0412 B2 0413 B3 0414 B4 0415 B5 0416 B6 0417 B7 0418 B8 0419 B9 041A BA 041B BB 041C BC 041D BD 041E BE 041F BF 0420 C0 0421 C1 0422 C2 0423 C3 0424 C4 0425 C5 0426 C6 0427 C7 0428 C8 0429 C9 042A CA 042B CB 042C CC 042D CD 042E CE 042F CF 0430 D0 0431 D1 0432 D2 0433 D3 0434 D4 0435 D5 0436 D6 0437 D7 0438 D8 0439 D9 043A DA 043B DB 043C DC 043D DD 043E DE 043F DF 0440 E0 0441 E1 0442 E2 0443 E3 0444 E4 0445 E5 0446 E6 0447 E7 0448 E8 0449 E9 044A EA 044B EB 044C EC 044D ED 044E EE 044F EF 0451 F1 0452 F2 0453 F3 0454 F4 0455 F5 0456 F6 0457 F7 0458 F8 0459 F9 045A FA 045B FB 045C FC 045E FE 045F FF 2116 F0} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-5 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-5 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 FD 00AD AD 0401 A1 0402 A2 0403 A3 0404 A4 0405 A5 0406 A6 0407 A7 0408 A8 0409 A9 040A AA 040B AB 040C AC 040E AE 040F AF 0410 B0 0411 B1 0412 B2 0413 B3 0414 B4 0415 B5 0416 B6 0417 B7 0418 B8 0419 B9 041A BA 041B BB 041C BC 041D BD 041E BE 041F BF 0420 C0 0421 C1 0422 C2 0423 C3 0424 C4 0425 C5 0426 C6 0427 C7 0428 C8 0429 C9 042A CA 042B CB 042C CC 042D CD 042E CE 042F CF 0430 D0 0431 D1 0432 D2 0433 D3 0434 D4 0435 D5 0436 D6 0437 D7 0438 D8 0439 D9 043A DA 043B DB 043C DC 043D DD 043E DE 043F DF 0440 E0 0441 E1 0442 E2 0443 E3 0444 E4 0445 E5 0446 E6 0447 E7 0448 E8 0449 E9 044A EA 044B EB 044C EC 044D ED 044E EE 044F EF 0451 F1 0452 F2 0453 F3 0454 F4 0455 F5 0456 F6 0457 F7 0458 F8 0459 F9 045A FA 045B FB 045C FC 045E FE 045F FF 2116 F0} +} -result {} + +# iso8859-5 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-5 + +# iso8859-5 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-5 \U000000A1 tcl8 1A -1 {} {} + iso8859-5 \U000000A1 replace 1A -1 {} {} + iso8859-5 \U000000A1 strict {} 0 {} {} + iso8859-5 \U00000400 tcl8 1A -1 {} {} + iso8859-5 \U00000400 replace 1A -1 {} {} + iso8859-5 \U00000400 strict {} 0 {} {} + iso8859-5 \U0000D800 tcl8 1A -1 {} {} + iso8859-5 \U0000D800 replace 1A -1 {} {} + iso8859-5 \U0000D800 strict {} 0 {} {} + iso8859-5 \U0000DC00 tcl8 1A -1 {} {} + iso8859-5 \U0000DC00 replace 1A -1 {} {} + iso8859-5 \U0000DC00 strict {} 0 {} {} + iso8859-5 \U00010000 tcl8 1A -1 {} {} + iso8859-5 \U00010000 replace 1A -1 {} {} + iso8859-5 \U00010000 strict {} 0 {} {} + iso8859-5 \U0010FFFF tcl8 1A -1 {} {} + iso8859-5 \U0010FFFF replace 1A -1 {} {} + iso8859-5 \U0010FFFF strict {} 0 {} {} +}; # iso8859-5 + +# +# iso8859-6 (generated from glibc-ISO_8859_6-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-6 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-6 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00AD AD 060C AC 061B BB 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D7 0638 D8 0639 D9 063A DA 0640 E0 0641 E1 0642 E2 0643 E3 0644 E4 0645 E5 0646 E6 0647 E7 0648 E8 0649 E9 064A EA 064B EB 064C EC 064D ED 064E EE 064F EF 0650 F0 0651 F1 0652 F2} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-6 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-6 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A4 A4 00AD AD 060C AC 061B BB 061F BF 0621 C1 0622 C2 0623 C3 0624 C4 0625 C5 0626 C6 0627 C7 0628 C8 0629 C9 062A CA 062B CB 062C CC 062D CD 062E CE 062F CF 0630 D0 0631 D1 0632 D2 0633 D3 0634 D4 0635 D5 0636 D6 0637 D7 0638 D8 0639 D9 063A DA 0640 E0 0641 E1 0642 E2 0643 E3 0644 E4 0645 E5 0646 E6 0647 E7 0648 E8 0649 E9 064A EA 064B EB 064C EC 064D ED 064E EE 064F EF 0650 F0 0651 F1 0652 F2} +} -result {} + +# iso8859-6 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-6 A1 tcl8 \U000000A1 -1 {} {} + iso8859-6 A1 replace \uFFFD -1 {} {} + iso8859-6 A1 strict {} 0 {} {} + iso8859-6 A2 tcl8 \U000000A2 -1 {} {} + iso8859-6 A2 replace \uFFFD -1 {} {} + iso8859-6 A2 strict {} 0 {} {} + iso8859-6 A3 tcl8 \U000000A3 -1 {} {} + iso8859-6 A3 replace \uFFFD -1 {} {} + iso8859-6 A3 strict {} 0 {} {} + iso8859-6 A5 tcl8 \U000000A5 -1 {} {} + iso8859-6 A5 replace \uFFFD -1 {} {} + iso8859-6 A5 strict {} 0 {} {} + iso8859-6 A6 tcl8 \U000000A6 -1 {} {} + iso8859-6 A6 replace \uFFFD -1 {} {} + iso8859-6 A6 strict {} 0 {} {} + iso8859-6 A7 tcl8 \U000000A7 -1 {} {} + iso8859-6 A7 replace \uFFFD -1 {} {} + iso8859-6 A7 strict {} 0 {} {} + iso8859-6 A8 tcl8 \U000000A8 -1 {} {} + iso8859-6 A8 replace \uFFFD -1 {} {} + iso8859-6 A8 strict {} 0 {} {} + iso8859-6 A9 tcl8 \U000000A9 -1 {} {} + iso8859-6 A9 replace \uFFFD -1 {} {} + iso8859-6 A9 strict {} 0 {} {} + iso8859-6 AA tcl8 \U000000AA -1 {} {} + iso8859-6 AA replace \uFFFD -1 {} {} + iso8859-6 AA strict {} 0 {} {} + iso8859-6 AB tcl8 \U000000AB -1 {} {} + iso8859-6 AB replace \uFFFD -1 {} {} + iso8859-6 AB strict {} 0 {} {} + iso8859-6 AE tcl8 \U000000AE -1 {} {} + iso8859-6 AE replace \uFFFD -1 {} {} + iso8859-6 AE strict {} 0 {} {} + iso8859-6 AF tcl8 \U000000AF -1 {} {} + iso8859-6 AF replace \uFFFD -1 {} {} + iso8859-6 AF strict {} 0 {} {} + iso8859-6 B0 tcl8 \U000000B0 -1 {} {} + iso8859-6 B0 replace \uFFFD -1 {} {} + iso8859-6 B0 strict {} 0 {} {} + iso8859-6 B1 tcl8 \U000000B1 -1 {} {} + iso8859-6 B1 replace \uFFFD -1 {} {} + iso8859-6 B1 strict {} 0 {} {} + iso8859-6 B2 tcl8 \U000000B2 -1 {} {} + iso8859-6 B2 replace \uFFFD -1 {} {} + iso8859-6 B2 strict {} 0 {} {} + iso8859-6 B3 tcl8 \U000000B3 -1 {} {} + iso8859-6 B3 replace \uFFFD -1 {} {} + iso8859-6 B3 strict {} 0 {} {} + iso8859-6 B4 tcl8 \U000000B4 -1 {} {} + iso8859-6 B4 replace \uFFFD -1 {} {} + iso8859-6 B4 strict {} 0 {} {} + iso8859-6 B5 tcl8 \U000000B5 -1 {} {} + iso8859-6 B5 replace \uFFFD -1 {} {} + iso8859-6 B5 strict {} 0 {} {} + iso8859-6 B6 tcl8 \U000000B6 -1 {} {} + iso8859-6 B6 replace \uFFFD -1 {} {} + iso8859-6 B6 strict {} 0 {} {} + iso8859-6 B7 tcl8 \U000000B7 -1 {} {} + iso8859-6 B7 replace \uFFFD -1 {} {} + iso8859-6 B7 strict {} 0 {} {} + iso8859-6 B8 tcl8 \U000000B8 -1 {} {} + iso8859-6 B8 replace \uFFFD -1 {} {} + iso8859-6 B8 strict {} 0 {} {} + iso8859-6 B9 tcl8 \U000000B9 -1 {} {} + iso8859-6 B9 replace \uFFFD -1 {} {} + iso8859-6 B9 strict {} 0 {} {} + iso8859-6 BA tcl8 \U000000BA -1 {} {} + iso8859-6 BA replace \uFFFD -1 {} {} + iso8859-6 BA strict {} 0 {} {} + iso8859-6 BC tcl8 \U000000BC -1 {} {} + iso8859-6 BC replace \uFFFD -1 {} {} + iso8859-6 BC strict {} 0 {} {} + iso8859-6 BD tcl8 \U000000BD -1 {} {} + iso8859-6 BD replace \uFFFD -1 {} {} + iso8859-6 BD strict {} 0 {} {} + iso8859-6 BE tcl8 \U000000BE -1 {} {} + iso8859-6 BE replace \uFFFD -1 {} {} + iso8859-6 BE strict {} 0 {} {} + iso8859-6 C0 tcl8 \U000000C0 -1 {} {} + iso8859-6 C0 replace \uFFFD -1 {} {} + iso8859-6 C0 strict {} 0 {} {} + iso8859-6 DB tcl8 \U000000DB -1 {} {} + iso8859-6 DB replace \uFFFD -1 {} {} + iso8859-6 DB strict {} 0 {} {} + iso8859-6 DC tcl8 \U000000DC -1 {} {} + iso8859-6 DC replace \uFFFD -1 {} {} + iso8859-6 DC strict {} 0 {} {} + iso8859-6 DD tcl8 \U000000DD -1 {} {} + iso8859-6 DD replace \uFFFD -1 {} {} + iso8859-6 DD strict {} 0 {} {} + iso8859-6 DE tcl8 \U000000DE -1 {} {} + iso8859-6 DE replace \uFFFD -1 {} {} + iso8859-6 DE strict {} 0 {} {} + iso8859-6 DF tcl8 \U000000DF -1 {} {} + iso8859-6 DF replace \uFFFD -1 {} {} + iso8859-6 DF strict {} 0 {} {} + iso8859-6 F3 tcl8 \U000000F3 -1 {} {} + iso8859-6 F3 replace \uFFFD -1 {} {} + iso8859-6 F3 strict {} 0 {} {} + iso8859-6 F4 tcl8 \U000000F4 -1 {} {} + iso8859-6 F4 replace \uFFFD -1 {} {} + iso8859-6 F4 strict {} 0 {} {} + iso8859-6 F5 tcl8 \U000000F5 -1 {} {} + iso8859-6 F5 replace \uFFFD -1 {} {} + iso8859-6 F5 strict {} 0 {} {} + iso8859-6 F6 tcl8 \U000000F6 -1 {} {} + iso8859-6 F6 replace \uFFFD -1 {} {} + iso8859-6 F6 strict {} 0 {} {} + iso8859-6 F7 tcl8 \U000000F7 -1 {} {} + iso8859-6 F7 replace \uFFFD -1 {} {} + iso8859-6 F7 strict {} 0 {} {} + iso8859-6 F8 tcl8 \U000000F8 -1 {} {} + iso8859-6 F8 replace \uFFFD -1 {} {} + iso8859-6 F8 strict {} 0 {} {} + iso8859-6 F9 tcl8 \U000000F9 -1 {} {} + iso8859-6 F9 replace \uFFFD -1 {} {} + iso8859-6 F9 strict {} 0 {} {} + iso8859-6 FA tcl8 \U000000FA -1 {} {} + iso8859-6 FA replace \uFFFD -1 {} {} + iso8859-6 FA strict {} 0 {} {} + iso8859-6 FB tcl8 \U000000FB -1 {} {} + iso8859-6 FB replace \uFFFD -1 {} {} + iso8859-6 FB strict {} 0 {} {} + iso8859-6 FC tcl8 \U000000FC -1 {} {} + iso8859-6 FC replace \uFFFD -1 {} {} + iso8859-6 FC strict {} 0 {} {} + iso8859-6 FD tcl8 \U000000FD -1 {} {} + iso8859-6 FD replace \uFFFD -1 {} {} + iso8859-6 FD strict {} 0 {} {} + iso8859-6 FE tcl8 \U000000FE -1 {} {} + iso8859-6 FE replace \uFFFD -1 {} {} + iso8859-6 FE strict {} 0 {} {} + iso8859-6 FF tcl8 \U000000FF -1 {} {} + iso8859-6 FF replace \uFFFD -1 {} {} + iso8859-6 FF strict {} 0 {} {} +}; # iso8859-6 + +# iso8859-6 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-6 \U000000A1 tcl8 1A -1 {} {} + iso8859-6 \U000000A1 replace 1A -1 {} {} + iso8859-6 \U000000A1 strict {} 0 {} {} + iso8859-6 \U00000400 tcl8 1A -1 {} {} + iso8859-6 \U00000400 replace 1A -1 {} {} + iso8859-6 \U00000400 strict {} 0 {} {} + iso8859-6 \U0000D800 tcl8 1A -1 {} {} + iso8859-6 \U0000D800 replace 1A -1 {} {} + iso8859-6 \U0000D800 strict {} 0 {} {} + iso8859-6 \U0000DC00 tcl8 1A -1 {} {} + iso8859-6 \U0000DC00 replace 1A -1 {} {} + iso8859-6 \U0000DC00 strict {} 0 {} {} + iso8859-6 \U00010000 tcl8 1A -1 {} {} + iso8859-6 \U00010000 replace 1A -1 {} {} + iso8859-6 \U00010000 strict {} 0 {} {} + iso8859-6 \U0010FFFF tcl8 1A -1 {} {} + iso8859-6 \U0010FFFF replace 1A -1 {} {} + iso8859-6 \U0010FFFF strict {} 0 {} {} +}; # iso8859-6 + +# +# iso8859-7 (generated from glibc-ISO_8859_7-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-7 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-7 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B7 B7 00BB BB 00BD BD 037A AA 0384 B4 0385 B5 0386 B6 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2015 AF 2018 A1 2019 A2 20AC A4 20AF A5} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-7 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-7 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B7 B7 00BB BB 00BD BD 037A AA 0384 B4 0385 B5 0386 B6 0388 B8 0389 B9 038A BA 038C BC 038E BE 038F BF 0390 C0 0391 C1 0392 C2 0393 C3 0394 C4 0395 C5 0396 C6 0397 C7 0398 C8 0399 C9 039A CA 039B CB 039C CC 039D CD 039E CE 039F CF 03A0 D0 03A1 D1 03A3 D3 03A4 D4 03A5 D5 03A6 D6 03A7 D7 03A8 D8 03A9 D9 03AA DA 03AB DB 03AC DC 03AD DD 03AE DE 03AF DF 03B0 E0 03B1 E1 03B2 E2 03B3 E3 03B4 E4 03B5 E5 03B6 E6 03B7 E7 03B8 E8 03B9 E9 03BA EA 03BB EB 03BC EC 03BD ED 03BE EE 03BF EF 03C0 F0 03C1 F1 03C2 F2 03C3 F3 03C4 F4 03C5 F5 03C6 F6 03C7 F7 03C8 F8 03C9 F9 03CA FA 03CB FB 03CC FC 03CD FD 03CE FE 2015 AF 2018 A1 2019 A2 20AC A4 20AF A5} +} -result {} + +# iso8859-7 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-7 AE tcl8 \U000000AE -1 {} {} + iso8859-7 AE replace \uFFFD -1 {} {} + iso8859-7 AE strict {} 0 {} {} + iso8859-7 D2 tcl8 \U000000D2 -1 {} {} + iso8859-7 D2 replace \uFFFD -1 {} {} + iso8859-7 D2 strict {} 0 {} {} + iso8859-7 FF tcl8 \U000000FF -1 {} {} + iso8859-7 FF replace \uFFFD -1 {} {} + iso8859-7 FF strict {} 0 {} {} +}; # iso8859-7 + +# iso8859-7 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-7 \U000000A1 tcl8 1A -1 {} {} + iso8859-7 \U000000A1 replace 1A -1 {} {} + iso8859-7 \U000000A1 strict {} 0 {} {} + iso8859-7 \U00000400 tcl8 1A -1 {} {} + iso8859-7 \U00000400 replace 1A -1 {} {} + iso8859-7 \U00000400 strict {} 0 {} {} + iso8859-7 \U0000D800 tcl8 1A -1 {} {} + iso8859-7 \U0000D800 replace 1A -1 {} {} + iso8859-7 \U0000D800 strict {} 0 {} {} + iso8859-7 \U0000DC00 tcl8 1A -1 {} {} + iso8859-7 \U0000DC00 replace 1A -1 {} {} + iso8859-7 \U0000DC00 strict {} 0 {} {} + iso8859-7 \U00010000 tcl8 1A -1 {} {} + iso8859-7 \U00010000 replace 1A -1 {} {} + iso8859-7 \U00010000 strict {} 0 {} {} + iso8859-7 \U0010FFFF tcl8 1A -1 {} {} + iso8859-7 \U0010FFFF replace 1A -1 {} {} + iso8859-7 \U0010FFFF strict {} 0 {} {} +}; # iso8859-7 + +# +# iso8859-8 (generated from glibc-ISO_8859_8-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-8 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-8 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 AA 00F7 BA 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 200E FD 200F FE 2017 DF} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-8 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-8 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00D7 AA 00F7 BA 05D0 E0 05D1 E1 05D2 E2 05D3 E3 05D4 E4 05D5 E5 05D6 E6 05D7 E7 05D8 E8 05D9 E9 05DA EA 05DB EB 05DC EC 05DD ED 05DE EE 05DF EF 05E0 F0 05E1 F1 05E2 F2 05E3 F3 05E4 F4 05E5 F5 05E6 F6 05E7 F7 05E8 F8 05E9 F9 05EA FA 200E FD 200F FE 2017 DF} +} -result {} + +# iso8859-8 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-8 A1 tcl8 \U000000A1 -1 {} {} + iso8859-8 A1 replace \uFFFD -1 {} {} + iso8859-8 A1 strict {} 0 {} {} + iso8859-8 BF tcl8 \U000000BF -1 {} {} + iso8859-8 BF replace \uFFFD -1 {} {} + iso8859-8 BF strict {} 0 {} {} + iso8859-8 C0 tcl8 \U000000C0 -1 {} {} + iso8859-8 C0 replace \uFFFD -1 {} {} + iso8859-8 C0 strict {} 0 {} {} + iso8859-8 C1 tcl8 \U000000C1 -1 {} {} + iso8859-8 C1 replace \uFFFD -1 {} {} + iso8859-8 C1 strict {} 0 {} {} + iso8859-8 C2 tcl8 \U000000C2 -1 {} {} + iso8859-8 C2 replace \uFFFD -1 {} {} + iso8859-8 C2 strict {} 0 {} {} + iso8859-8 C3 tcl8 \U000000C3 -1 {} {} + iso8859-8 C3 replace \uFFFD -1 {} {} + iso8859-8 C3 strict {} 0 {} {} + iso8859-8 C4 tcl8 \U000000C4 -1 {} {} + iso8859-8 C4 replace \uFFFD -1 {} {} + iso8859-8 C4 strict {} 0 {} {} + iso8859-8 C5 tcl8 \U000000C5 -1 {} {} + iso8859-8 C5 replace \uFFFD -1 {} {} + iso8859-8 C5 strict {} 0 {} {} + iso8859-8 C6 tcl8 \U000000C6 -1 {} {} + iso8859-8 C6 replace \uFFFD -1 {} {} + iso8859-8 C6 strict {} 0 {} {} + iso8859-8 C7 tcl8 \U000000C7 -1 {} {} + iso8859-8 C7 replace \uFFFD -1 {} {} + iso8859-8 C7 strict {} 0 {} {} + iso8859-8 C8 tcl8 \U000000C8 -1 {} {} + iso8859-8 C8 replace \uFFFD -1 {} {} + iso8859-8 C8 strict {} 0 {} {} + iso8859-8 C9 tcl8 \U000000C9 -1 {} {} + iso8859-8 C9 replace \uFFFD -1 {} {} + iso8859-8 C9 strict {} 0 {} {} + iso8859-8 CA tcl8 \U000000CA -1 {} {} + iso8859-8 CA replace \uFFFD -1 {} {} + iso8859-8 CA strict {} 0 {} {} + iso8859-8 CB tcl8 \U000000CB -1 {} {} + iso8859-8 CB replace \uFFFD -1 {} {} + iso8859-8 CB strict {} 0 {} {} + iso8859-8 CC tcl8 \U000000CC -1 {} {} + iso8859-8 CC replace \uFFFD -1 {} {} + iso8859-8 CC strict {} 0 {} {} + iso8859-8 CD tcl8 \U000000CD -1 {} {} + iso8859-8 CD replace \uFFFD -1 {} {} + iso8859-8 CD strict {} 0 {} {} + iso8859-8 CE tcl8 \U000000CE -1 {} {} + iso8859-8 CE replace \uFFFD -1 {} {} + iso8859-8 CE strict {} 0 {} {} + iso8859-8 CF tcl8 \U000000CF -1 {} {} + iso8859-8 CF replace \uFFFD -1 {} {} + iso8859-8 CF strict {} 0 {} {} + iso8859-8 D0 tcl8 \U000000D0 -1 {} {} + iso8859-8 D0 replace \uFFFD -1 {} {} + iso8859-8 D0 strict {} 0 {} {} + iso8859-8 D1 tcl8 \U000000D1 -1 {} {} + iso8859-8 D1 replace \uFFFD -1 {} {} + iso8859-8 D1 strict {} 0 {} {} + iso8859-8 D2 tcl8 \U000000D2 -1 {} {} + iso8859-8 D2 replace \uFFFD -1 {} {} + iso8859-8 D2 strict {} 0 {} {} + iso8859-8 D3 tcl8 \U000000D3 -1 {} {} + iso8859-8 D3 replace \uFFFD -1 {} {} + iso8859-8 D3 strict {} 0 {} {} + iso8859-8 D4 tcl8 \U000000D4 -1 {} {} + iso8859-8 D4 replace \uFFFD -1 {} {} + iso8859-8 D4 strict {} 0 {} {} + iso8859-8 D5 tcl8 \U000000D5 -1 {} {} + iso8859-8 D5 replace \uFFFD -1 {} {} + iso8859-8 D5 strict {} 0 {} {} + iso8859-8 D6 tcl8 \U000000D6 -1 {} {} + iso8859-8 D6 replace \uFFFD -1 {} {} + iso8859-8 D6 strict {} 0 {} {} + iso8859-8 D7 tcl8 \U000000D7 -1 {} {} + iso8859-8 D7 replace \uFFFD -1 {} {} + iso8859-8 D7 strict {} 0 {} {} + iso8859-8 D8 tcl8 \U000000D8 -1 {} {} + iso8859-8 D8 replace \uFFFD -1 {} {} + iso8859-8 D8 strict {} 0 {} {} + iso8859-8 D9 tcl8 \U000000D9 -1 {} {} + iso8859-8 D9 replace \uFFFD -1 {} {} + iso8859-8 D9 strict {} 0 {} {} + iso8859-8 DA tcl8 \U000000DA -1 {} {} + iso8859-8 DA replace \uFFFD -1 {} {} + iso8859-8 DA strict {} 0 {} {} + iso8859-8 DB tcl8 \U000000DB -1 {} {} + iso8859-8 DB replace \uFFFD -1 {} {} + iso8859-8 DB strict {} 0 {} {} + iso8859-8 DC tcl8 \U000000DC -1 {} {} + iso8859-8 DC replace \uFFFD -1 {} {} + iso8859-8 DC strict {} 0 {} {} + iso8859-8 DD tcl8 \U000000DD -1 {} {} + iso8859-8 DD replace \uFFFD -1 {} {} + iso8859-8 DD strict {} 0 {} {} + iso8859-8 DE tcl8 \U000000DE -1 {} {} + iso8859-8 DE replace \uFFFD -1 {} {} + iso8859-8 DE strict {} 0 {} {} + iso8859-8 FB tcl8 \U000000FB -1 {} {} + iso8859-8 FB replace \uFFFD -1 {} {} + iso8859-8 FB strict {} 0 {} {} + iso8859-8 FC tcl8 \U000000FC -1 {} {} + iso8859-8 FC replace \uFFFD -1 {} {} + iso8859-8 FC strict {} 0 {} {} + iso8859-8 FF tcl8 \U000000FF -1 {} {} + iso8859-8 FF replace \uFFFD -1 {} {} + iso8859-8 FF strict {} 0 {} {} +}; # iso8859-8 + +# iso8859-8 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-8 \U000000A1 tcl8 1A -1 {} {} + iso8859-8 \U000000A1 replace 1A -1 {} {} + iso8859-8 \U000000A1 strict {} 0 {} {} + iso8859-8 \U00000400 tcl8 1A -1 {} {} + iso8859-8 \U00000400 replace 1A -1 {} {} + iso8859-8 \U00000400 strict {} 0 {} {} + iso8859-8 \U0000D800 tcl8 1A -1 {} {} + iso8859-8 \U0000D800 replace 1A -1 {} {} + iso8859-8 \U0000D800 strict {} 0 {} {} + iso8859-8 \U0000DC00 tcl8 1A -1 {} {} + iso8859-8 \U0000DC00 replace 1A -1 {} {} + iso8859-8 \U0000DC00 strict {} 0 {} {} + iso8859-8 \U00010000 tcl8 1A -1 {} {} + iso8859-8 \U00010000 replace 1A -1 {} {} + iso8859-8 \U00010000 strict {} 0 {} {} + iso8859-8 \U0010FFFF tcl8 1A -1 {} {} + iso8859-8 \U0010FFFF replace 1A -1 {} {} + iso8859-8 \U0010FFFF strict {} 0 {} {} +}; # iso8859-8 + +# +# iso8859-9 (generated from glibc-ISO_8859_9-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-9 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-9 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 015E DE 015F FE} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-9 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-9 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A4 A4 00A5 A5 00A6 A6 00A7 A7 00A8 A8 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B4 B4 00B5 B5 00B6 B6 00B7 B7 00B8 B8 00B9 B9 00BA BA 00BB BB 00BC BC 00BD BD 00BE BE 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 011E D0 011F F0 0130 DD 0131 FD 015E DE 015F FE} +} -result {} + +# iso8859-9 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-9 + +# iso8859-9 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-9 \U000000D0 tcl8 1A -1 {} {} + iso8859-9 \U000000D0 replace 1A -1 {} {} + iso8859-9 \U000000D0 strict {} 0 {} {} + iso8859-9 \U00000400 tcl8 1A -1 {} {} + iso8859-9 \U00000400 replace 1A -1 {} {} + iso8859-9 \U00000400 strict {} 0 {} {} + iso8859-9 \U0000D800 tcl8 1A -1 {} {} + iso8859-9 \U0000D800 replace 1A -1 {} {} + iso8859-9 \U0000D800 strict {} 0 {} {} + iso8859-9 \U0000DC00 tcl8 1A -1 {} {} + iso8859-9 \U0000DC00 replace 1A -1 {} {} + iso8859-9 \U0000DC00 strict {} 0 {} {} + iso8859-9 \U00010000 tcl8 1A -1 {} {} + iso8859-9 \U00010000 replace 1A -1 {} {} + iso8859-9 \U00010000 strict {} 0 {} {} + iso8859-9 \U0010FFFF tcl8 1A -1 {} {} + iso8859-9 \U0010FFFF replace 1A -1 {} {} + iso8859-9 \U0010FFFF strict {} 0 {} {} +}; # iso8859-9 + +# +# iso8859-10 (generated from glibc-ISO_8859_10-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-10 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-10 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00AD AD 00B0 B0 00B7 B7 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00CF CF 00D0 D0 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00EF EF 00F0 F0 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 A9 0111 B9 0112 A2 0113 B2 0116 CC 0117 EC 0118 CA 0119 EA 0122 A3 0123 B3 0128 A5 0129 B5 012A A4 012B B4 012E C7 012F E7 0136 A6 0137 B6 0138 FF 013B A8 013C B8 0145 D1 0146 F1 014A AF 014B BF 014C D2 014D F2 0160 AA 0161 BA 0166 AB 0167 BB 0168 D7 0169 F7 016A AE 016B BE 0172 D9 0173 F9 017D AC 017E BC 2015 BD} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-10 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-10 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00AD AD 00B0 B0 00B7 B7 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C9 C9 00CB CB 00CD CD 00CE CE 00CF CF 00D0 D0 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E9 E9 00EB EB 00ED ED 00EE EE 00EF EF 00F0 F0 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 0100 C0 0101 E0 0104 A1 0105 B1 010C C8 010D E8 0110 A9 0111 B9 0112 A2 0113 B2 0116 CC 0117 EC 0118 CA 0119 EA 0122 A3 0123 B3 0128 A5 0129 B5 012A A4 012B B4 012E C7 012F E7 0136 A6 0137 B6 0138 FF 013B A8 013C B8 0145 D1 0146 F1 014A AF 014B BF 014C D2 014D F2 0160 AA 0161 BA 0166 AB 0167 BB 0168 D7 0169 F7 016A AE 016B BE 0172 D9 0173 F9 017D AC 017E BC 2015 BD} +} -result {} + +# iso8859-10 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-10 + +# iso8859-10 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-10 \U000000A1 tcl8 1A -1 {} {} + iso8859-10 \U000000A1 replace 1A -1 {} {} + iso8859-10 \U000000A1 strict {} 0 {} {} + iso8859-10 \U00000400 tcl8 1A -1 {} {} + iso8859-10 \U00000400 replace 1A -1 {} {} + iso8859-10 \U00000400 strict {} 0 {} {} + iso8859-10 \U0000D800 tcl8 1A -1 {} {} + iso8859-10 \U0000D800 replace 1A -1 {} {} + iso8859-10 \U0000D800 strict {} 0 {} {} + iso8859-10 \U0000DC00 tcl8 1A -1 {} {} + iso8859-10 \U0000DC00 replace 1A -1 {} {} + iso8859-10 \U0000DC00 strict {} 0 {} {} + iso8859-10 \U00010000 tcl8 1A -1 {} {} + iso8859-10 \U00010000 replace 1A -1 {} {} + iso8859-10 \U00010000 strict {} 0 {} {} + iso8859-10 \U0010FFFF tcl8 1A -1 {} {} + iso8859-10 \U0010FFFF replace 1A -1 {} {} + iso8859-10 \U0010FFFF strict {} 0 {} {} +}; # iso8859-10 + +# +# iso8859-11 (generated from glibc-ISO_8859_11-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-11 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-11 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 0E01 A1 0E02 A2 0E03 A3 0E04 A4 0E05 A5 0E06 A6 0E07 A7 0E08 A8 0E09 A9 0E0A AA 0E0B AB 0E0C AC 0E0D AD 0E0E AE 0E0F AF 0E10 B0 0E11 B1 0E12 B2 0E13 B3 0E14 B4 0E15 B5 0E16 B6 0E17 B7 0E18 B8 0E19 B9 0E1A BA 0E1B BB 0E1C BC 0E1D BD 0E1E BE 0E1F BF 0E20 C0 0E21 C1 0E22 C2 0E23 C3 0E24 C4 0E25 C5 0E26 C6 0E27 C7 0E28 C8 0E29 C9 0E2A CA 0E2B CB 0E2C CC 0E2D CD 0E2E CE 0E2F CF 0E30 D0 0E31 D1 0E32 D2 0E33 D3 0E34 D4 0E35 D5 0E36 D6 0E37 D7 0E38 D8 0E39 D9 0E3A DA 0E3F DF 0E40 E0 0E41 E1 0E42 E2 0E43 E3 0E44 E4 0E45 E5 0E46 E6 0E47 E7 0E48 E8 0E49 E9 0E4A EA 0E4B EB 0E4C EC 0E4D ED 0E4E EE 0E4F EF 0E50 F0 0E51 F1 0E52 F2 0E53 F3 0E54 F4 0E55 F5 0E56 F6 0E57 F7 0E58 F8 0E59 F9 0E5A FA 0E5B FB} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-11 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-11 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 0E01 A1 0E02 A2 0E03 A3 0E04 A4 0E05 A5 0E06 A6 0E07 A7 0E08 A8 0E09 A9 0E0A AA 0E0B AB 0E0C AC 0E0D AD 0E0E AE 0E0F AF 0E10 B0 0E11 B1 0E12 B2 0E13 B3 0E14 B4 0E15 B5 0E16 B6 0E17 B7 0E18 B8 0E19 B9 0E1A BA 0E1B BB 0E1C BC 0E1D BD 0E1E BE 0E1F BF 0E20 C0 0E21 C1 0E22 C2 0E23 C3 0E24 C4 0E25 C5 0E26 C6 0E27 C7 0E28 C8 0E29 C9 0E2A CA 0E2B CB 0E2C CC 0E2D CD 0E2E CE 0E2F CF 0E30 D0 0E31 D1 0E32 D2 0E33 D3 0E34 D4 0E35 D5 0E36 D6 0E37 D7 0E38 D8 0E39 D9 0E3A DA 0E3F DF 0E40 E0 0E41 E1 0E42 E2 0E43 E3 0E44 E4 0E45 E5 0E46 E6 0E47 E7 0E48 E8 0E49 E9 0E4A EA 0E4B EB 0E4C EC 0E4D ED 0E4E EE 0E4F EF 0E50 F0 0E51 F1 0E52 F2 0E53 F3 0E54 F4 0E55 F5 0E56 F6 0E57 F7 0E58 F8 0E59 F9 0E5A FA 0E5B FB} +} -result {} + +# iso8859-11 - invalid byte sequences +lappend encInvalidBytes {*}{ + iso8859-11 DB tcl8 \U000000DB -1 {} {} + iso8859-11 DB replace \uFFFD -1 {} {} + iso8859-11 DB strict {} 0 {} {} + iso8859-11 DC tcl8 \U000000DC -1 {} {} + iso8859-11 DC replace \uFFFD -1 {} {} + iso8859-11 DC strict {} 0 {} {} + iso8859-11 DD tcl8 \U000000DD -1 {} {} + iso8859-11 DD replace \uFFFD -1 {} {} + iso8859-11 DD strict {} 0 {} {} + iso8859-11 DE tcl8 \U000000DE -1 {} {} + iso8859-11 DE replace \uFFFD -1 {} {} + iso8859-11 DE strict {} 0 {} {} + iso8859-11 FC tcl8 \U000000FC -1 {} {} + iso8859-11 FC replace \uFFFD -1 {} {} + iso8859-11 FC strict {} 0 {} {} + iso8859-11 FD tcl8 \U000000FD -1 {} {} + iso8859-11 FD replace \uFFFD -1 {} {} + iso8859-11 FD strict {} 0 {} {} + iso8859-11 FE tcl8 \U000000FE -1 {} {} + iso8859-11 FE replace \uFFFD -1 {} {} + iso8859-11 FE strict {} 0 {} {} + iso8859-11 FF tcl8 \U000000FF -1 {} {} + iso8859-11 FF replace \uFFFD -1 {} {} + iso8859-11 FF strict {} 0 {} {} +}; # iso8859-11 + +# iso8859-11 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-11 \U000000A1 tcl8 1A -1 {} {} + iso8859-11 \U000000A1 replace 1A -1 {} {} + iso8859-11 \U000000A1 strict {} 0 {} {} + iso8859-11 \U00000400 tcl8 1A -1 {} {} + iso8859-11 \U00000400 replace 1A -1 {} {} + iso8859-11 \U00000400 strict {} 0 {} {} + iso8859-11 \U0000D800 tcl8 1A -1 {} {} + iso8859-11 \U0000D800 replace 1A -1 {} {} + iso8859-11 \U0000D800 strict {} 0 {} {} + iso8859-11 \U0000DC00 tcl8 1A -1 {} {} + iso8859-11 \U0000DC00 replace 1A -1 {} {} + iso8859-11 \U0000DC00 strict {} 0 {} {} + iso8859-11 \U00010000 tcl8 1A -1 {} {} + iso8859-11 \U00010000 replace 1A -1 {} {} + iso8859-11 \U00010000 strict {} 0 {} {} + iso8859-11 \U0010FFFF tcl8 1A -1 {} {} + iso8859-11 \U0010FFFF replace 1A -1 {} {} + iso8859-11 \U0010FFFF strict {} 0 {} {} +}; # iso8859-11 + +# +# iso8859-13 (generated from glibc-ISO_8859_13-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-13 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-13 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 2019 FF 201C B4 201D A1 201E A5} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-13 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-13 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A2 A2 00A3 A3 00A4 A4 00A6 A6 00A7 A7 00A9 A9 00AB AB 00AC AC 00AD AD 00AE AE 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BB BB 00BC BC 00BD BD 00BE BE 00C4 C4 00C5 C5 00C6 AF 00C9 C9 00D3 D3 00D5 D5 00D6 D6 00D7 D7 00D8 A8 00DC DC 00DF DF 00E4 E4 00E5 E5 00E6 BF 00E9 E9 00F3 F3 00F5 F5 00F6 F6 00F7 F7 00F8 B8 00FC FC 0100 C2 0101 E2 0104 C0 0105 E0 0106 C3 0107 E3 010C C8 010D E8 0112 C7 0113 E7 0116 CB 0117 EB 0118 C6 0119 E6 0122 CC 0123 EC 012A CE 012B EE 012E C1 012F E1 0136 CD 0137 ED 013B CF 013C EF 0141 D9 0142 F9 0143 D1 0144 F1 0145 D2 0146 F2 014C D4 014D F4 0156 AA 0157 BA 015A DA 015B FA 0160 D0 0161 F0 016A DB 016B FB 0172 D8 0173 F8 0179 CA 017A EA 017B DD 017C FD 017D DE 017E FE 2019 FF 201C B4 201D A1 201E A5} +} -result {} + +# iso8859-13 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-13 + +# iso8859-13 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-13 \U000000A1 tcl8 1A -1 {} {} + iso8859-13 \U000000A1 replace 1A -1 {} {} + iso8859-13 \U000000A1 strict {} 0 {} {} + iso8859-13 \U00000400 tcl8 1A -1 {} {} + iso8859-13 \U00000400 replace 1A -1 {} {} + iso8859-13 \U00000400 strict {} 0 {} {} + iso8859-13 \U0000D800 tcl8 1A -1 {} {} + iso8859-13 \U0000D800 replace 1A -1 {} {} + iso8859-13 \U0000D800 strict {} 0 {} {} + iso8859-13 \U0000DC00 tcl8 1A -1 {} {} + iso8859-13 \U0000DC00 replace 1A -1 {} {} + iso8859-13 \U0000DC00 strict {} 0 {} {} + iso8859-13 \U00010000 tcl8 1A -1 {} {} + iso8859-13 \U00010000 replace 1A -1 {} {} + iso8859-13 \U00010000 strict {} 0 {} {} + iso8859-13 \U0010FFFF tcl8 1A -1 {} {} + iso8859-13 \U0010FFFF replace 1A -1 {} {} + iso8859-13 \U0010FFFF strict {} 0 {} {} +}; # iso8859-13 + +# +# iso8859-14 (generated from glibc-ISO_8859_14-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-14 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-14 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A7 A7 00A9 A9 00AD AD 00AE AE 00B6 B6 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FF FF 010A A4 010B A5 0120 B2 0121 B3 0174 D0 0175 F0 0176 DE 0177 FE 0178 AF 1E02 A1 1E03 A2 1E0A A6 1E0B AB 1E1E B0 1E1F B1 1E40 B4 1E41 B5 1E56 B7 1E57 B9 1E60 BB 1E61 BF 1E6A D7 1E6B F7 1E80 A8 1E81 B8 1E82 AA 1E83 BA 1E84 BD 1E85 BE 1EF2 AC 1EF3 BC} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-14 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-14 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A3 A3 00A7 A7 00A9 A9 00AD AD 00AE AE 00B6 B6 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FF FF 010A A4 010B A5 0120 B2 0121 B3 0174 D0 0175 F0 0176 DE 0177 FE 0178 AF 1E02 A1 1E03 A2 1E0A A6 1E0B AB 1E1E B0 1E1F B1 1E40 B4 1E41 B5 1E56 B7 1E57 B9 1E60 BB 1E61 BF 1E6A D7 1E6B F7 1E80 A8 1E81 B8 1E82 AA 1E83 BA 1E84 BD 1E85 BE 1EF2 AC 1EF3 BC} +} -result {} + +# iso8859-14 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-14 + +# iso8859-14 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-14 \U000000A1 tcl8 1A -1 {} {} + iso8859-14 \U000000A1 replace 1A -1 {} {} + iso8859-14 \U000000A1 strict {} 0 {} {} + iso8859-14 \U00000400 tcl8 1A -1 {} {} + iso8859-14 \U00000400 replace 1A -1 {} {} + iso8859-14 \U00000400 strict {} 0 {} {} + iso8859-14 \U0000D800 tcl8 1A -1 {} {} + iso8859-14 \U0000D800 replace 1A -1 {} {} + iso8859-14 \U0000D800 strict {} 0 {} {} + iso8859-14 \U0000DC00 tcl8 1A -1 {} {} + iso8859-14 \U0000DC00 replace 1A -1 {} {} + iso8859-14 \U0000DC00 strict {} 0 {} {} + iso8859-14 \U00010000 tcl8 1A -1 {} {} + iso8859-14 \U00010000 replace 1A -1 {} {} + iso8859-14 \U00010000 strict {} 0 {} {} + iso8859-14 \U0010FFFF tcl8 1A -1 {} {} + iso8859-14 \U0010FFFF replace 1A -1 {} {} + iso8859-14 \U0010FFFF strict {} 0 {} {} +}; # iso8859-14 + +# +# iso8859-15 (generated from glibc-ISO_8859_15-2.1.2) + +test encoding-convertfrom-ucmCompare-iso8859-15 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-15 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A7 A7 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BA BA 00BB BB 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 BC 0153 BD 0160 A6 0161 A8 0178 BE 017D B4 017E B8 20AC A4} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-15 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-15 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A1 A1 00A2 A2 00A3 A3 00A5 A5 00A7 A7 00A9 A9 00AA AA 00AB AB 00AC AC 00AD AD 00AE AE 00AF AF 00B0 B0 00B1 B1 00B2 B2 00B3 B3 00B5 B5 00B6 B6 00B7 B7 00B9 B9 00BA BA 00BB BB 00BF BF 00C0 C0 00C1 C1 00C2 C2 00C3 C3 00C4 C4 00C5 C5 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D0 D0 00D1 D1 00D2 D2 00D3 D3 00D4 D4 00D5 D5 00D6 D6 00D7 D7 00D8 D8 00D9 D9 00DA DA 00DB DB 00DC DC 00DD DD 00DE DE 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E3 E3 00E4 E4 00E5 E5 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F0 F0 00F1 F1 00F2 F2 00F3 F3 00F4 F4 00F5 F5 00F6 F6 00F7 F7 00F8 F8 00F9 F9 00FA FA 00FB FB 00FC FC 00FD FD 00FE FE 00FF FF 0152 BC 0153 BD 0160 A6 0161 A8 0178 BE 017D B4 017E B8 20AC A4} +} -result {} + +# iso8859-15 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-15 + +# iso8859-15 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-15 \U000000A4 tcl8 1A -1 {} {} + iso8859-15 \U000000A4 replace 1A -1 {} {} + iso8859-15 \U000000A4 strict {} 0 {} {} + iso8859-15 \U00000400 tcl8 1A -1 {} {} + iso8859-15 \U00000400 replace 1A -1 {} {} + iso8859-15 \U00000400 strict {} 0 {} {} + iso8859-15 \U0000D800 tcl8 1A -1 {} {} + iso8859-15 \U0000D800 replace 1A -1 {} {} + iso8859-15 \U0000D800 strict {} 0 {} {} + iso8859-15 \U0000DC00 tcl8 1A -1 {} {} + iso8859-15 \U0000DC00 replace 1A -1 {} {} + iso8859-15 \U0000DC00 strict {} 0 {} {} + iso8859-15 \U00010000 tcl8 1A -1 {} {} + iso8859-15 \U00010000 replace 1A -1 {} {} + iso8859-15 \U00010000 strict {} 0 {} {} + iso8859-15 \U0010FFFF tcl8 1A -1 {} {} + iso8859-15 \U0010FFFF replace 1A -1 {} {} + iso8859-15 \U0010FFFF strict {} 0 {} {} +}; # iso8859-15 + +# +# iso8859-16 (generated from glibc-ISO_8859_16-2.3.3) + +test encoding-convertfrom-ucmCompare-iso8859-16 {Compare against ICU UCM} -body { + ucmConvertfromMismatches iso8859-16 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00A9 A9 00AB AB 00AD AD 00B0 B0 00B1 B1 00B6 B6 00B7 B7 00BB BB 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0104 A1 0105 A2 0106 C5 0107 E5 010C B2 010D B9 0110 D0 0111 F0 0118 DD 0119 FD 0141 A3 0142 B3 0143 D1 0144 F1 0150 D5 0151 F5 0152 BC 0153 BD 015A D7 015B F7 0160 A6 0161 A8 0170 D8 0171 F8 0178 BE 0179 AC 017A AE 017B AF 017C BF 017D B4 017E B8 0218 AA 0219 BA 021A DE 021B FE 201D B5 201E A5 20AC A4} +} -result {} + +test encoding-convertto-ucmCompare-iso8859-16 {Compare against ICU UCM} -body { + ucmConverttoMismatches iso8859-16 {0000 00 0001 01 0002 02 0003 03 0004 04 0005 05 0006 06 0007 07 0008 08 0009 09 000A 0A 000B 0B 000C 0C 000D 0D 000E 0E 000F 0F 0010 10 0011 11 0012 12 0013 13 0014 14 0015 15 0016 16 0017 17 0018 18 0019 19 001A 1A 001B 1B 001C 1C 001D 1D 001E 1E 001F 1F 0020 20 0021 21 0022 22 0023 23 0024 24 0025 25 0026 26 0027 27 0028 28 0029 29 002A 2A 002B 2B 002C 2C 002D 2D 002E 2E 002F 2F 0030 30 0031 31 0032 32 0033 33 0034 34 0035 35 0036 36 0037 37 0038 38 0039 39 003A 3A 003B 3B 003C 3C 003D 3D 003E 3E 003F 3F 0040 40 0041 41 0042 42 0043 43 0044 44 0045 45 0046 46 0047 47 0048 48 0049 49 004A 4A 004B 4B 004C 4C 004D 4D 004E 4E 004F 4F 0050 50 0051 51 0052 52 0053 53 0054 54 0055 55 0056 56 0057 57 0058 58 0059 59 005A 5A 005B 5B 005C 5C 005D 5D 005E 5E 005F 5F 0060 60 0061 61 0062 62 0063 63 0064 64 0065 65 0066 66 0067 67 0068 68 0069 69 006A 6A 006B 6B 006C 6C 006D 6D 006E 6E 006F 6F 0070 70 0071 71 0072 72 0073 73 0074 74 0075 75 0076 76 0077 77 0078 78 0079 79 007A 7A 007B 7B 007C 7C 007D 7D 007E 7E 007F 7F 0080 80 0081 81 0082 82 0083 83 0084 84 0085 85 0086 86 0087 87 0088 88 0089 89 008A 8A 008B 8B 008C 8C 008D 8D 008E 8E 008F 8F 0090 90 0091 91 0092 92 0093 93 0094 94 0095 95 0096 96 0097 97 0098 98 0099 99 009A 9A 009B 9B 009C 9C 009D 9D 009E 9E 009F 9F 00A0 A0 00A7 A7 00A9 A9 00AB AB 00AD AD 00B0 B0 00B1 B1 00B6 B6 00B7 B7 00BB BB 00C0 C0 00C1 C1 00C2 C2 00C4 C4 00C6 C6 00C7 C7 00C8 C8 00C9 C9 00CA CA 00CB CB 00CC CC 00CD CD 00CE CE 00CF CF 00D2 D2 00D3 D3 00D4 D4 00D6 D6 00D9 D9 00DA DA 00DB DB 00DC DC 00DF DF 00E0 E0 00E1 E1 00E2 E2 00E4 E4 00E6 E6 00E7 E7 00E8 E8 00E9 E9 00EA EA 00EB EB 00EC EC 00ED ED 00EE EE 00EF EF 00F2 F2 00F3 F3 00F4 F4 00F6 F6 00F9 F9 00FA FA 00FB FB 00FC FC 00FF FF 0102 C3 0103 E3 0104 A1 0105 A2 0106 C5 0107 E5 010C B2 010D B9 0110 D0 0111 F0 0118 DD 0119 FD 0141 A3 0142 B3 0143 D1 0144 F1 0150 D5 0151 F5 0152 BC 0153 BD 015A D7 015B F7 0160 A6 0161 A8 0170 D8 0171 F8 0178 BE 0179 AC 017A AE 017B AF 017C BF 017D B4 017E B8 0218 AA 0219 BA 021A DE 021B FE 201D B5 201E A5 20AC A4} +} -result {} + +# iso8859-16 - invalid byte sequences +lappend encInvalidBytes {*}{ +}; # iso8859-16 + +# iso8859-16 - invalid byte sequences +lappend encUnencodableStrings {*}{ + iso8859-16 \U000000A1 tcl8 1A -1 {} {} + iso8859-16 \U000000A1 replace 1A -1 {} {} + iso8859-16 \U000000A1 strict {} 0 {} {} + iso8859-16 \U00000400 tcl8 1A -1 {} {} + iso8859-16 \U00000400 replace 1A -1 {} {} + iso8859-16 \U00000400 strict {} 0 {} {} + iso8859-16 \U0000D800 tcl8 1A -1 {} {} + iso8859-16 \U0000D800 replace 1A -1 {} {} + iso8859-16 \U0000D800 strict {} 0 {} {} + iso8859-16 \U0000DC00 tcl8 1A -1 {} {} + iso8859-16 \U0000DC00 replace 1A -1 {} {} + iso8859-16 \U0000DC00 strict {} 0 {} {} + iso8859-16 \U00010000 tcl8 1A -1 {} {} + iso8859-16 \U00010000 replace 1A -1 {} {} + iso8859-16 \U00010000 strict {} 0 {} {} + iso8859-16 \U0010FFFF tcl8 1A -1 {} {} + iso8859-16 \U0010FFFF replace 1A -1 {} {} + iso8859-16 \U0010FFFF strict {} 0 {} {} +}; # iso8859-16 -- cgit v0.12 From ad8dae7587f0afd369d467a051b23a68cc3703ac Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 06:29:10 +0000 Subject: Bug [e778e3f804]. Fix error message for invalid profile name. --- generic/tclEncoding.c | 28 +++++++++++++++++++--------- tests/encoding.test | 8 ++++++++ tests/ioCmd.test | 4 ++++ 3 files changed, 31 insertions(+), 9 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 264ca96..3842f2f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -188,15 +188,16 @@ static Tcl_Encoding systemEncoding = NULL; Tcl_Encoding tclIdentityEncoding = NULL; /* - * Names of encoding profiles and corresponding integer values + * Names of encoding profiles and corresponding integer values. + * Keep alphabetical order for error messages. */ static struct TclEncodingProfiles { const char *name; int value; } encodingProfiles[] = { - {"tcl8", TCL_ENCODING_PROFILE_TCL8}, - {"strict", TCL_ENCODING_PROFILE_STRICT}, {"replace", TCL_ENCODING_PROFILE_REPLACE}, + {"strict", TCL_ENCODING_PROFILE_STRICT}, + {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ @@ -4395,19 +4396,28 @@ TclEncodingProfileNameToId( int *profilePtr) /* Output */ { size_t i; + size_t numProfiles = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); - for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + for (i = 0; i < numProfiles; ++i) { if (!strcmp(profileName, encodingProfiles[i].name)) { *profilePtr = encodingProfiles[i].value; return TCL_OK; } } if (interp) { - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf( - "bad profile \"%s\". Must be \"tcl8\" or \"strict\".", - profileName)); + Tcl_Obj *errorObj; + /* This code assumes at least two profiles :-) */ + errorObj = + Tcl_ObjPrintf("bad profile name \"%s\": must be", + profileName); + for (i = 0; i < (numProfiles - 1); ++i) { + Tcl_AppendStringsToObj( + errorObj, " ", encodingProfiles[i].name, ",", NULL); + } + Tcl_AppendStringsToObj( + errorObj, " or ", encodingProfiles[numProfiles-1].name, NULL); + + Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "PROFILE", profileName, NULL); } diff --git a/tests/encoding.test b/tests/encoding.test index 215b5c8..8044c8c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -106,6 +106,14 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} +test encoding-3.3 {fconfigure -encodingprofile} -setup { + set old [fconfigure stdout -encodingprofile] +} -body { + fconfigure stdout -encodingprofile replace + fconfigure stdout -encodingprofile +} -cleanup { + fconfigure stdout -encodingprofile $old +} -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { cd [makeDirectory tmp] diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 9e28569..a1ec571 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -369,6 +369,10 @@ test iocmd-8.20 {fconfigure command / win console channel} -constraints {nonPort # TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). +test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { + fconfigure stdin -encodingprofile froboz +} -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} + test iocmd-9.1 {eof command} { list [catch {eof} msg] $msg $::errorCode } {1 {wrong # args: should be "eof channelId"} {TCL WRONGARGS}} -- cgit v0.12 From 44fdf09f7bf8a1e0ae30d1eaea83d5cd1d2fdca2 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 06:41:39 +0000 Subject: Bug [e778e3f804]. Fix error message for invalid profile name. --- generic/tclEncoding.c | 28 +++++++++++++++++++--------- tests/encoding.test | 8 ++++++++ tests/io.test | 2 +- tests/ioCmd.test | 4 ++++ 4 files changed, 32 insertions(+), 10 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1d336f5..b32db7c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -188,15 +188,16 @@ static Tcl_Encoding systemEncoding = NULL; Tcl_Encoding tclIdentityEncoding = NULL; /* - * Names of encoding profiles and corresponding integer values + * Names of encoding profiles and corresponding integer values. + * Keep alphabetical order for error messages. */ static struct TclEncodingProfiles { const char *name; int value; } encodingProfiles[] = { - {"tcl8", TCL_ENCODING_PROFILE_TCL8}, - {"strict", TCL_ENCODING_PROFILE_STRICT}, {"replace", TCL_ENCODING_PROFILE_REPLACE}, + {"strict", TCL_ENCODING_PROFILE_STRICT}, + {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ @@ -4418,19 +4419,28 @@ TclEncodingProfileNameToId( int *profilePtr) /* Output */ { size_t i; + size_t numProfiles = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); - for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) { + for (i = 0; i < numProfiles; ++i) { if (!strcmp(profileName, encodingProfiles[i].name)) { *profilePtr = encodingProfiles[i].value; return TCL_OK; } } if (interp) { - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf( - "bad profile \"%s\". Must be \"tcl8\" or \"strict\".", - profileName)); + Tcl_Obj *errorObj; + /* This code assumes at least two profiles :-) */ + errorObj = + Tcl_ObjPrintf("bad profile name \"%s\": must be", + profileName); + for (i = 0; i < (numProfiles - 1); ++i) { + Tcl_AppendStringsToObj( + errorObj, " ", encodingProfiles[i].name, ",", NULL); + } + Tcl_AppendStringsToObj( + errorObj, " or ", encodingProfiles[numProfiles-1].name, NULL); + + Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode( interp, "TCL", "ENCODING", "PROFILE", profileName, NULL); } diff --git a/tests/encoding.test b/tests/encoding.test index 800d93b..a51b6c0 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -105,6 +105,14 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} +test encoding-3.3 {fconfigure -encodingprofile} -setup { + set old [fconfigure stdout -encodingprofile] +} -body { + fconfigure stdout -encodingprofile replace + fconfigure stdout -encodingprofile +} -cleanup { + fconfigure stdout -encodingprofile $old +} -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { cd [makeDirectory tmp] diff --git a/tests/io.test b/tests/io.test index 66dee7d..836a9b8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7622,7 +7622,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $in -encoding ascii -encodingprofile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 8c9d870..23cd67e 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -390,6 +390,10 @@ test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strict } -result 0 +test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { + fconfigure stdin -encodingprofile froboz +} -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} + test iocmd-9.1 {eof command} { list [catch {eof} msg] $msg $::errorCode } {1 {wrong # args: should be "eof channelId"} {TCL WRONGARGS}} -- cgit v0.12 From 4d674569535d565275d4a4d4a16a8c63ed7c41f9 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 2 Mar 2023 07:08:48 +0000 Subject: Disable more file permissions tests for WSL (not supported in WSL/NTFS) --- tests/chanio.test | 6 ++++-- tests/cmdAH.test | 8 +++++--- tests/fCmd.test | 38 +++++++++++++++++++------------------- tests/io.test | 6 ++++-- tests/tcltest.test | 11 +++++++---- tests/unixFCmd.test | 12 +++++++----- 6 files changed, 46 insertions(+), 35 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index ae03d71..0176c13 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -50,6 +50,8 @@ namespace eval ::tcl::test::io { testConstraint notWinCI [expr { $::tcl_platform(platform) ne "windows" || ![info exists ::env(CI)]}] testConstraint notOSX [expr {$::tcl_platform(os) ne "Darwin"}] + # File permissions broken on wsl without some "exotic" wsl configuration + testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] testConstraint specialfiles [expr {[file exists /dev/zero] || [file exists NUL]}] # You need a *very* special environment to do some tests. In particular, @@ -5348,7 +5350,7 @@ test chan-io-40.1 {POSIX open access modes: RDWR} -setup { } -result {zzy abzzy} test chan-io-40.2 {POSIX open access modes: CREAT} -setup { file delete $path(test3) -} -constraints {unix} -body { +} -constraints {unix notWsl} -body { set f [open $path(test3) {WRONLY CREAT} 0o600] file stat $path(test3) stats set x [format 0o%03o [expr {$stats(mode) & 0o777}]] @@ -5361,7 +5363,7 @@ test chan-io-40.2 {POSIX open access modes: CREAT} -setup { } -result {0o600 {line 1}} test chan-io-40.3 {POSIX open access modes: CREAT} -setup { file delete $path(test3) -} -constraints {unix umask} -body { +} -constraints {unix umask notWsl} -body { # This test only works if your umask is 2, like ouster's. chan close [open $path(test3) {WRONLY CREAT}] file stat $path(test3) stats diff --git a/tests/cmdAH.test b/tests/cmdAH.test index d7a3657..875bacb 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -32,6 +32,8 @@ testConstraint linkDirectory [expr { && [lindex [file system [temporaryDirectory]] 1] eq "NTFS") }] testConstraint notWine [expr {![info exists ::env(CI_USING_WINE)]}] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] global env set cmdAHwd [pwd] @@ -1019,7 +1021,7 @@ test cmdAH-16.2 {Tcl_FileObjCmd: readable} { -result 1 } test cmdAH-16.3 {Tcl_FileObjCmd: readable} { - -constraints {unix notRoot testchmod} + -constraints {unix notRoot testchmod notWsl} -setup {testchmod 0o333 $gorpfile} -body {file readable $gorpfile} -result 0 @@ -1052,7 +1054,7 @@ set gorpfile [makeFile abcde gorp.file] test cmdAH-18.1 {Tcl_FileObjCmd: executable} -returnCodes error -body { file executable a b } -result {wrong # args: should be "file executable name"} -test cmdAH-18.2 {Tcl_FileObjCmd: executable} {notRoot} { +test cmdAH-18.2 {Tcl_FileObjCmd: executable} {notRoot notWsl} { file executable $gorpfile } 0 test cmdAH-18.3 {Tcl_FileObjCmd: executable} {unix testchmod} { @@ -1600,7 +1602,7 @@ test cmdAH-28.4 {Tcl_FileObjCmd: stat} -setup { file stat $gorpfile stat list $stat(nlink) $stat(size) $stat(type) } -result {1 12 file} -test cmdAH-28.5 {Tcl_FileObjCmd: stat} -constraints {unix} -setup { +test cmdAH-28.5 {Tcl_FileObjCmd: stat} -constraints {unix notWsl} -setup { unset -nocomplain stat } -body { file stat $gorpfile stat diff --git a/tests/fCmd.test b/tests/fCmd.test index e6fa893..13f4cf1 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -43,7 +43,7 @@ if {[testConstraint win]} { testConstraint notInCIenv [expr {![info exists ::env(CI)] || !$::env(CI)}] # File permissions broken on wsl without some "exotic" wsl configuration -testConstraint notInWsl [expr {[llength [array names ::env *WSL*]] == 0}] +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] set tmpspace /tmp;# default value # Find a group that exists on this Unix system, or else skip tests that @@ -357,7 +357,7 @@ test fCmd-4.10 {TclFileMakeDirsCmd: exists, is dir} -setup { } -result {1 1} test fCmd-4.11 {TclFileMakeDirsCmd: doesn't exist: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod notInWsl} -returnCodes error -body { +} -constraints {unix notRoot testchmod notWsl} -returnCodes error -body { file mkdir td1/td2/td3 testchmod 0 td1/td2 file mkdir td1/td2/td3/td4 @@ -375,7 +375,7 @@ test fCmd-4.13 {TclFileMakeDirsCmd: doesn't exist: errno == ENOENT} -setup { test fCmd-4.14 {TclFileMakeDirsCmd: TclpCreateDirectory fails} -setup { cleanup file delete -force foo -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo file attr foo -perm 0o40000 file mkdir foo/tf1 @@ -500,7 +500,7 @@ test fCmd-6.5 {CopyRenameOneFile: lstat(target) != 0} -setup { } -result {tf2} test fCmd-6.6 {CopyRenameOneFile: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod notInWsl} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 testchmod 0 td1 createfile tf1 @@ -619,7 +619,7 @@ test fCmd-6.22 {CopyRenameOneFile: copy/rename: !S_ISDIR(source)} -setup { } -result [file join $tmpspace tf1] test fCmd-6.23 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {xdev notRoot notInWsl} -body { +} -constraints {xdev notRoot notWsl} -body { file mkdir td1/td2/td3 file attributes td1 -permissions 0 file rename td1 $tmpspace @@ -671,7 +671,7 @@ test fCmd-6.27 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { } -match glob -result {error renaming "td1" to "/tmp/tcl*/td1": file already exists} test fCmd-6.28 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {notRoot xdev notInWsl} -body { +} -constraints {notRoot xdev notWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2/td3 -permissions 0 file rename td1 $tmpspace @@ -688,7 +688,7 @@ test fCmd-6.29 {CopyRenameOneFile: TclpCopyDirectory passed} -setup { } -result [file join $tmpspace td1 td2] test fCmd-6.30 {CopyRenameOneFile: TclpRemoveDirectory failed} -setup { cleanup $tmpspace -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar $tmpspace @@ -763,7 +763,7 @@ test fCmd-8.3 {file copy and path translation: ensure correct error} -body { test fCmd-9.1 {file rename: comprehensive: EACCES} -setup { cleanup -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir td1 file mkdir td2 file attr td2 -perm 0o40000 @@ -789,7 +789,7 @@ test fCmd-9.3 {file rename: comprehensive: file to new name} -setup { } -result {{tf3 tf4} 1 0} test fCmd-9.4 {file rename: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod notDarwin9 notInWsl} -body { +} -constraints {unix notRoot testchmod notDarwin9 notWsl} -body { file mkdir td1 td2 testchmod 0o555 td2 file rename td1 td3 @@ -810,7 +810,7 @@ test fCmd-9.5 {file rename: comprehensive: file to self} -setup { } -result {tf1 tf2 1 0} test fCmd-9.6 {file rename: comprehensive: dir to self} -setup { cleanup -} -constraints {unix notRoot testchmod notInWsl} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 file mkdir td2 testchmod 0o555 td2 @@ -1004,7 +1004,7 @@ test fCmd-10.2 {file copy: comprehensive: file to new name} -setup { } -result {{tf1 tf2 tf3 tf4} tf1 tf2 1 0} test fCmd-10.3 {file copy: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod notInWsl} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir [file join td1 tdx] file mkdir [file join td2 tdy] testchmod 0o555 td2 @@ -1081,7 +1081,7 @@ test fCmd-10.5 {file copy: comprehensive: dir to empty dir} -setup { } -result [subst {{td1 td2 tdd1 tdd2 tdd3 tdd4 tds1 tds2 tds3 tds4} {1 {error copying "td1" to "[file join td2 td1]": file already exists}} {1 {error copying "tds1" to "[file join tdd1 tds1]": file already exists}} 1 1 1}] test fCmd-10.6 {file copy: comprehensive: dir to non-empty dir} -setup { cleanup -} -constraints {notRoot unixOrWin testchmod notInWsl} -body { +} -constraints {notRoot unixOrWin testchmod notWsl} -body { file mkdir tds1 file mkdir tds2 file mkdir [file join tdd1 tds1 xxx] @@ -1105,7 +1105,7 @@ test fCmd-10.7 {file rename: comprehensive: file to new name and dir} -setup { } -result [subst {{tf1 tf2} {[file join td1 tf3] [file join td1 tf4]} 1 0}] test fCmd-10.8 {file rename: comprehensive: dir to new name and dir} -setup { cleanup -} -constraints {unix notRoot testchmod notInWsl} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 file mkdir td2 file mkdir td3 @@ -1302,7 +1302,7 @@ test fCmd-12.7 {renamefile: renaming directory into offspring} -setup { } -result {1} test fCmd-12.8 {renamefile: generic error} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/dir file attributes tfa -permissions 0o555 @@ -1489,7 +1489,7 @@ test fCmd-14.7 {copyfile: copy directory succeeding} -setup { } -result {1 1} test fCmd-14.8 {copyfile: copy directory failing} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa/dir/a/b/c file attributes tfa/dir -permissions 0 catch {file copy tfa tfa2} @@ -1631,7 +1631,7 @@ test fCmd-16.8 {remove a normal file} -constraints {notRoot} -setup { } -result {1} test fCmd-16.9 {error while deleting file} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa createfile tfa/a file attributes tfa -permissions 0o555 @@ -1662,7 +1662,7 @@ test fCmd-16.11 {TclFileDeleteCmd: removing a nonexistant file} -setup { # More coverage tests for mkpath() test fCmd-17.1 {mkdir stat failing on target but not ENOENT} -setup { catch {file delete -force -- tfa1} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa1 file attributes tfa1 -permissions 0o555 catch {file mkdir tfa1/tfa2} @@ -1872,7 +1872,7 @@ test fCmd-19.1 {remove empty directory} -constraints {notRoot} -setup { } -result {0} test fCmd-19.2 {rmdir error besides EEXIST} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa -permissions 0o555 @@ -1900,7 +1900,7 @@ test fCmd-19.3 {recursive remove} -constraints {notRoot} -setup { # test fCmd-20.1 {TraverseUnixTree : failure opening a subdirectory directory} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot notInWsl} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa/a -permissions 00000 diff --git a/tests/io.test b/tests/io.test index dd291dd..04c0cc8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -48,6 +48,8 @@ testConstraint testservicemode [llength [info commands testservicemode]] testConstraint notWinCI [expr { $::tcl_platform(platform) ne "windows" || ![info exists ::env(CI)]}] testConstraint notOSX [expr {$::tcl_platform(os) ne "Darwin"}] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] # You need a *very* special environment to do some tests. In # particular, many file systems do not support large-files... @@ -5813,7 +5815,7 @@ test io-40.1 {POSIX open access modes: RDWR} { close $f set x } {zzy abzzy} -test io-40.2 {POSIX open access modes: CREAT} {unix} { +test io-40.2 {POSIX open access modes: CREAT} {unix notWsl} { file delete $path(test3) set f [open $path(test3) {WRONLY CREAT} 0o600] file stat $path(test3) stats @@ -5825,7 +5827,7 @@ test io-40.2 {POSIX open access modes: CREAT} {unix} { close $f set x } {0o600 {line 1}} -test io-40.3 {POSIX open access modes: CREAT} {unix umask} { +test io-40.3 {POSIX open access modes: CREAT} {unix umask notWsl} { # This test only works if your umask is 2, like ouster's. file delete $path(test3) set f [open $path(test3) {WRONLY CREAT}] diff --git a/tests/tcltest.test b/tests/tcltest.test index a9ce785..49f31d5 100644 --- a/tests/tcltest.test +++ b/tests/tcltest.test @@ -22,6 +22,9 @@ if {"::tcltest" ni [namespace children]} { namespace import -force ::tcltest::* } +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] + namespace eval ::tcltest::test { namespace import ::tcltest::* @@ -306,7 +309,7 @@ test tcltest-5.3 {testConstraint - constraint empty (tcltest::safeFetch)} { #} test tcltest-5.5 {InitConstraints: list of built-in constraints} \ - -constraints {!singleTestInterp} \ + -constraints {!singleTestInterp notWsl} \ -setup {tcltest::InitConstraints} \ -body { lsort [array names ::tcltest::testConstraints] } \ -result [lsort { @@ -556,7 +559,7 @@ switch -- $::tcl_platform(platform) { } } test tcltest-8.3 {tcltest a.tcl -tmpdir notReadableDir} { - -constraints {unix notRoot} + -constraints {unix notRoot notWsl} -body { child msg $a -tmpdir $notReadableDir return $msg @@ -572,7 +575,7 @@ testConstraint notFAT [expr { }] # FAT/NTFS permissions are fairly hopeless; ignore this test if that FS is used test tcltest-8.4 {tcltest a.tcl -tmpdir notWriteableDir} { - -constraints {unixOrWin notRoot notFAT} + -constraints {unixOrWin notRoot notFAT notWsl} -body { child msg $a -tmpdir $notWriteableDir return $msg @@ -645,7 +648,7 @@ test tcltest-8.11 {tcltest a.tcl -testdir thisdirectoryisafile} { -result {*not a directory*} } test tcltest-8.12 {tcltest a.tcl -testdir notReadableDir} { - -constraints {unix notRoot} + -constraints {unix notRoot notWsl} -body { child msg $a -testdir $notReadableDir return $msg diff --git a/tests/unixFCmd.test b/tests/unixFCmd.test index 3eade4a..e1084af 100644 --- a/tests/unixFCmd.test +++ b/tests/unixFCmd.test @@ -18,6 +18,8 @@ if {"::tcltest" ni [namespace children]} { catch [list package require -exact tcl::test [info patchlevel]] testConstraint testchmod [llength [info commands testchmod]] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] # These tests really need to be run from a writable directory, which # it is assumed [temporaryDirectory] is. @@ -94,7 +96,7 @@ if {[testConstraint unix] && [testConstraint notRoot]} { test unixFCmd-1.1 {TclpRenameFile: EACCES} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2 -permissions 0 file rename td1/td2/td3 td2 @@ -135,7 +137,7 @@ test unixFCmd-1.6 {TclpRenameFile: ENOTDIR} {emptyTest unix notRoot} { } {} test unixFCmd-1.7 {TclpRenameFile: EXDEV} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar /tmp @@ -219,7 +221,7 @@ test unixFCmd-2.4 {TclpCopyFile: src is fifo} -setup { } -result {fifo fifo} test unixFCmd-2.5 {TclpCopyFile: copy attributes} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { close [open tf1 a] file attributes tf1 -permissions 0o472 file copy tf1 tf2 @@ -334,7 +336,7 @@ test unixFCmd-16.3 {SetOwnerAttribute - invalid owner} -setup { test unixFCmd-17.1 {SetPermissionsAttribute} -setup { catch {file delete -force -- foo.test} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { close [open foo.test w] list [file attributes foo.test -permissions 0] \ [file attributes foo.test -permissions] @@ -366,7 +368,7 @@ test unixFCmd-17.4 {SetPermissionsAttribute} -setup { close [open foo.test w] set ::i 4 proc permcheck {testnum permList expected} { - test $testnum {SetPermissionsAttribute} {unix notRoot} { + test $testnum {SetPermissionsAttribute} {unix notRoot notWsl} { set result {} foreach permstr $permList { file attributes foo.test -permissions $permstr -- cgit v0.12 From d3aa6839f45e33d533ae9525378612cb04ab0dd1 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 3 Mar 2023 12:15:11 +0000 Subject: Fix Valgrind "still reachable" report in TestcmdtokenCmd(). --- generic/tclTest.c | 44 ++++++++++++++++++++++++++++++-------------- tests/basic.test | 6 +++--- tests/cmdInfo.test | 6 +++--- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index f4450ff..fbd4774 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1242,7 +1242,7 @@ TestcmdtokenCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - TestCommandTokenRef *refPtr; + TestCommandTokenRef *refPtr, *prevRefPtr; char buf[30]; int id; @@ -1261,9 +1261,7 @@ TestcmdtokenCmd( firstCommandTokenRef = refPtr; sprintf(buf, "%d", refPtr->id); Tcl_AppendResult(interp, buf, NULL); - } else if (strcmp(argv[1], "name") == 0) { - Tcl_Obj *objPtr; - + } else { if (sscanf(argv[2], "%d", &id) != 1) { Tcl_AppendResult(interp, "bad command token \"", argv[2], "\"", NULL); @@ -1283,18 +1281,36 @@ TestcmdtokenCmd( return TCL_ERROR; } - objPtr = Tcl_NewObj(); - Tcl_GetCommandFullName(interp, refPtr->token, objPtr); + if (strcmp(argv[1], "name") == 0) { + Tcl_Obj *objPtr; - Tcl_AppendElement(interp, - Tcl_GetCommandName(interp, refPtr->token)); - Tcl_AppendElement(interp, Tcl_GetString(objPtr)); - Tcl_DecrRefCount(objPtr); - } else { - Tcl_AppendResult(interp, "bad option \"", argv[1], - "\": must be create or name", NULL); - return TCL_ERROR; + objPtr = Tcl_NewObj(); + Tcl_GetCommandFullName(interp, refPtr->token, objPtr); + + Tcl_AppendElement(interp, + Tcl_GetCommandName(interp, refPtr->token)); + Tcl_AppendElement(interp, Tcl_GetString(objPtr)); + Tcl_DecrRefCount(objPtr); + } else if (strcmp(argv[1], "free") == 0) { + prevRefPtr = NULL; + for (refPtr = firstCommandTokenRef; refPtr != NULL; + refPtr = refPtr->nextPtr) { + if (refPtr->id == id) { + if (prevRefPtr != NULL) { + prevRefPtr->nextPtr = refPtr->nextPtr; + } + ckfree(refPtr); + break; + } + prevRefPtr = refPtr; + } + } else { + Tcl_AppendResult(interp, "bad option \"", argv[1], + "\": must be create, name, or free", NULL); + return TCL_ERROR; + } } + return TCL_OK; } diff --git a/tests/basic.test b/tests/basic.test index f4c57fe..de986c7 100644 --- a/tests/basic.test +++ b/tests/basic.test @@ -336,19 +336,19 @@ test basic-20.1 {Tcl_GetCommandInfo, names for commands created inside namespace }] list [testcmdtoken name $x] \ [rename ::p q] \ - [testcmdtoken name $x] + [testcmdtoken name $x][testcmdtoken free $x] } {{p ::p} {} {q ::q}} test basic-20.2 {Tcl_GetCommandInfo, names for commands created outside namespaces} {testcmdtoken} { catch {rename q ""} set x [testcmdtoken create test_ns_basic::test_ns_basic2::p] list [testcmdtoken name $x] \ [rename test_ns_basic::test_ns_basic2::p q] \ - [testcmdtoken name $x] + [testcmdtoken name $x][testcmdtoken free $x] } {{p ::test_ns_basic::test_ns_basic2::p} {} {q ::q}} test basic-20.3 {Tcl_GetCommandInfo, #-quoting} testcmdtoken { catch {rename \# ""} set x [testcmdtoken create \#] - testcmdtoken name $x + return [testcmdtoken name $x][testcmdtoken free $x] } {{#} ::#} test basic-21.1 {Tcl_GetCommandName} {emptyTest} { diff --git a/tests/cmdInfo.test b/tests/cmdInfo.test index 37b8a0b..ad564d7 100644 --- a/tests/cmdInfo.test +++ b/tests/cmdInfo.test @@ -70,7 +70,7 @@ test cmdinfo-4.1 {Tcl_GetCommandName/Tcl_GetCommandFullName procedures} \ rename x1 newName set y [testcmdtoken name $x] rename newName x1 - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {newName ::newName x1 ::x1} catch {rename newTestCmd {}} @@ -87,7 +87,7 @@ test cmdinfo-5.1 {Names for commands created when inside namespaces} \ }] set y [testcmdtoken name $x] rename ::testCmd newTestCmd - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {testCmd ::testCmd newTestCmd ::newTestCmd} test cmdinfo-6.1 {Names for commands created when outside namespaces} \ @@ -95,7 +95,7 @@ test cmdinfo-6.1 {Names for commands created when outside namespaces} \ set x [testcmdtoken create cmdInfoNs1::cmdInfoNs2::testCmd] set y [testcmdtoken name $x] rename cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {testCmd ::cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 ::newTestCmd2} # cleanup -- cgit v0.12 From e7b37f1737b55c5d6bfaa56a41432a10e0ed91f9 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Fri, 3 Mar 2023 12:39:09 +0000 Subject: Fix Valgrind "still reachable" report in TestcmdtokenCmd(). --- generic/tclTest.c | 44 ++++++++++++++++++++++++++++++-------------- tests/basic.test | 6 +++--- tests/cmdInfo.test | 6 +++--- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index b6c7f77..72eca6c 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1206,7 +1206,7 @@ TestcmdtokenCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - TestCommandTokenRef *refPtr; + TestCommandTokenRef *refPtr, *prevRefPtr; char buf[30]; int id; @@ -1225,9 +1225,7 @@ TestcmdtokenCmd( firstCommandTokenRef = refPtr; sprintf(buf, "%d", refPtr->id); Tcl_AppendResult(interp, buf, NULL); - } else if (strcmp(argv[1], "name") == 0) { - Tcl_Obj *objPtr; - + } else { if (sscanf(argv[2], "%d", &id) != 1) { Tcl_AppendResult(interp, "bad command token \"", argv[2], "\"", NULL); @@ -1247,18 +1245,36 @@ TestcmdtokenCmd( return TCL_ERROR; } - objPtr = Tcl_NewObj(); - Tcl_GetCommandFullName(interp, refPtr->token, objPtr); + if (strcmp(argv[1], "name") == 0) { + Tcl_Obj *objPtr; - Tcl_AppendElement(interp, - Tcl_GetCommandName(interp, refPtr->token)); - Tcl_AppendElement(interp, Tcl_GetString(objPtr)); - Tcl_DecrRefCount(objPtr); - } else { - Tcl_AppendResult(interp, "bad option \"", argv[1], - "\": must be create or name", NULL); - return TCL_ERROR; + objPtr = Tcl_NewObj(); + Tcl_GetCommandFullName(interp, refPtr->token, objPtr); + + Tcl_AppendElement(interp, + Tcl_GetCommandName(interp, refPtr->token)); + Tcl_AppendElement(interp, Tcl_GetString(objPtr)); + Tcl_DecrRefCount(objPtr); + } else if (strcmp(argv[1], "free") == 0) { + prevRefPtr = NULL; + for (refPtr = firstCommandTokenRef; refPtr != NULL; + refPtr = refPtr->nextPtr) { + if (refPtr->id == id) { + if (prevRefPtr != NULL) { + prevRefPtr->nextPtr = refPtr->nextPtr; + } + ckfree(refPtr); + break; + } + prevRefPtr = refPtr; + } + } else { + Tcl_AppendResult(interp, "bad option \"", argv[1], + "\": must be create, name, or free", NULL); + return TCL_ERROR; + } } + return TCL_OK; } diff --git a/tests/basic.test b/tests/basic.test index f4c57fe..de986c7 100644 --- a/tests/basic.test +++ b/tests/basic.test @@ -336,19 +336,19 @@ test basic-20.1 {Tcl_GetCommandInfo, names for commands created inside namespace }] list [testcmdtoken name $x] \ [rename ::p q] \ - [testcmdtoken name $x] + [testcmdtoken name $x][testcmdtoken free $x] } {{p ::p} {} {q ::q}} test basic-20.2 {Tcl_GetCommandInfo, names for commands created outside namespaces} {testcmdtoken} { catch {rename q ""} set x [testcmdtoken create test_ns_basic::test_ns_basic2::p] list [testcmdtoken name $x] \ [rename test_ns_basic::test_ns_basic2::p q] \ - [testcmdtoken name $x] + [testcmdtoken name $x][testcmdtoken free $x] } {{p ::test_ns_basic::test_ns_basic2::p} {} {q ::q}} test basic-20.3 {Tcl_GetCommandInfo, #-quoting} testcmdtoken { catch {rename \# ""} set x [testcmdtoken create \#] - testcmdtoken name $x + return [testcmdtoken name $x][testcmdtoken free $x] } {{#} ::#} test basic-21.1 {Tcl_GetCommandName} {emptyTest} { diff --git a/tests/cmdInfo.test b/tests/cmdInfo.test index 37b8a0b..ad564d7 100644 --- a/tests/cmdInfo.test +++ b/tests/cmdInfo.test @@ -70,7 +70,7 @@ test cmdinfo-4.1 {Tcl_GetCommandName/Tcl_GetCommandFullName procedures} \ rename x1 newName set y [testcmdtoken name $x] rename newName x1 - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {newName ::newName x1 ::x1} catch {rename newTestCmd {}} @@ -87,7 +87,7 @@ test cmdinfo-5.1 {Names for commands created when inside namespaces} \ }] set y [testcmdtoken name $x] rename ::testCmd newTestCmd - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {testCmd ::testCmd newTestCmd ::newTestCmd} test cmdinfo-6.1 {Names for commands created when outside namespaces} \ @@ -95,7 +95,7 @@ test cmdinfo-6.1 {Names for commands created when outside namespaces} \ set x [testcmdtoken create cmdInfoNs1::cmdInfoNs2::testCmd] set y [testcmdtoken name $x] rename cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 - lappend y {*}[testcmdtoken name $x] + lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] } {testCmd ::cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 ::newTestCmd2} # cleanup -- cgit v0.12 From 7f3d1257ffc45e37ee8c71b666c37088eb2f1c48 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 3 Mar 2023 13:03:02 +0000 Subject: ckfree() shouldn't be used in Tcl 9 core code any more --- generic/tclTest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 72eca6c..a5d2e0b 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1263,7 +1263,7 @@ TestcmdtokenCmd( if (prevRefPtr != NULL) { prevRefPtr->nextPtr = refPtr->nextPtr; } - ckfree(refPtr); + Tcl_Free(refPtr); break; } prevRefPtr = refPtr; -- cgit v0.12 From 8f18e27691fabd477dde7d468fa26440ed5da4c2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 3 Mar 2023 13:04:11 +0000 Subject: Adapt type-casts to Tcl 9.0 signature of Tcl_Free/Tcl_Realloc/Tcl_AttemptRealloc --- generic/tcl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 1a1452e..ec78052 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2377,9 +2377,9 @@ EXTERN const char *TclZipfs_AppHook(int *argc, char ***argv); # define attemptckalloc Tcl_AttemptAlloc # ifdef _MSC_VER /* Silence invalid C4090 warnings */ -# define ckfree(a) Tcl_Free((char *)(a)) -# define ckrealloc(a,b) Tcl_Realloc((char *)(a),(b)) -# define attemptckrealloc(a,b) Tcl_AttemptRealloc((char *)(a),(b)) +# define ckfree(a) Tcl_Free((void *)(a)) +# define ckrealloc(a,b) Tcl_Realloc((void *)(a),(b)) +# define attemptckrealloc(a,b) Tcl_AttemptRealloc((void *)(a),(b)) # else # define ckfree Tcl_Free # define ckrealloc Tcl_Realloc -- cgit v0.12 From 6285c1336732a6a7db1fc3627dad6fe6176fbee6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 4 Mar 2023 10:08:18 +0000 Subject: Fix [1b8df10110]: Tcl_GetTime returns wrong usec value on Windows --- generic/tclDecls.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 6723069..8fc926c 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4035,7 +4035,7 @@ extern const TclStubs *tclStubsPtr; /* Handle Win64 tk.dll being loaded in Cygwin64. */ # define Tcl_GetTime(t) \ do { \ - union { \ + struct { \ Tcl_Time now; \ __int64 reserved; \ } _t; \ -- cgit v0.12 From e13edcba869deda8b613854d533c106c9855b61d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 4 Mar 2023 10:18:41 +0000 Subject: Test constraint notInCIenv no longer necessary (due to previous fix) --- tests/winTime.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/winTime.test b/tests/winTime.test index 68be966..ed8b625 100644 --- a/tests/winTime.test +++ b/tests/winTime.test @@ -19,9 +19,6 @@ if {"::tcltest" ni [namespace children]} { catch [list package require -exact Tcltest [info patchlevel]] testConstraint testwinclock [llength [info commands testwinclock]] -# Some things fail under all Continuous Integration systems for subtle reasons -# such as CI often running with elevated privileges in a container. -testConstraint notInCIenv [expr {![info exists ::env(CI)]}] # The next two tests will crash on Windows if the check for negative # clock values is not done properly. @@ -43,7 +40,7 @@ test winTime-1.2 {TclpGetDate} {win} { # with the Windows clock. 30 sec really isn't enough, # but how much time does a tester have patience for? -test winTime-2.1 {Synchronization of Tcl and Windows clocks} {testwinclock notInCIenv} { +test winTime-2.1 {Synchronization of Tcl and Windows clocks} testwinclock { # May fail due to OS/hardware discrepancies. See: # http://support.microsoft.com/default.aspx?scid=kb;en-us;274323 set failed {} -- cgit v0.12 From 0229ba1283c2457c63df5674f54831eeb4a120ca Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 4 Mar 2023 12:13:31 +0000 Subject: Bug [9c5a00c69d]. Tilde expansion on Windows --- win/tclWinFile.c | 73 +++++++++++++++++++++++++++----------------------------- 1 file changed, 35 insertions(+), 38 deletions(-) diff --git a/win/tclWinFile.c b/win/tclWinFile.c index 9a6c5f1..639cd72 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -1458,22 +1458,43 @@ TclpGetUserHome( if (domain == NULL) { const char *ptr; - /* - * No domain. Firstly check it's the current user - */ - + /* + * Treat the current user as a special case because the general case + * below does not properly retrieve the path. The NetUserGetInfo + * call returns an empty path and the code defaults to the user's + * name in the profiles directory. On modern Windows systems, this + * is generally wrong as when the account is a Microsoft account, + * for example abcdefghi@outlook.com, the directory name is + * abcde and not abcdefghi. + * + * Note we could have just used env(USERPROFILE) here but + * the intent is to retrieve (as on Unix) the system's view + * of the home irrespective of environment settings of HOME + * and USERPROFILE. + * + * Fixing this for the general user needs more investigating but + * at least for the current user we can use a direct call. + */ ptr = TclpGetUserName(&ds); if (ptr != NULL && strcasecmp(name, ptr) == 0) { - /* - * Try safest and fastest way to get current user home - */ - - ptr = TclGetEnv("HOME", &ds); - if (ptr != NULL) { - Tcl_JoinPath(1, &ptr, bufferPtr); - rc = 1; - result = Tcl_DStringValue(bufferPtr); - } + HANDLE hProcess; + WCHAR buf[MAX_PATH]; + DWORD nChars = sizeof(buf) / sizeof(buf[0]); + /* Sadly GetCurrentProcessToken not in Win 7 so slightly longer */ + hProcess = GetCurrentProcess(); /* Need not be closed */ + if (hProcess) { + HANDLE hToken; + if (OpenProcessToken(hProcess, TOKEN_QUERY, &hToken)) { + if (GetUserProfileDirectoryW(hToken, buf, &nChars)) { + Tcl_WinTCharToUtf((TCHAR *)buf, + (nChars-1)*sizeof(WCHAR), + bufferPtr); + result = Tcl_DStringValue(bufferPtr); + rc = 1; + } + CloseHandle(hToken); + } + } } Tcl_DStringFree(&ds); } else { @@ -1542,30 +1563,6 @@ TclpGetUserHome( if (wDomain != NULL) { NetApiBufferFree((void *) wDomain); } - if (result == NULL) { - /* - * Look in the "Password Lists" section of system.ini for the local - * user. There are also entries in that section that begin with a "*" - * character that are used by Windows for other purposes; ignore user - * names beginning with a "*". - */ - - char buf[MAX_PATH]; - - if (name[0] != '*') { - if (GetPrivateProfileStringA("Password Lists", name, "", buf, - MAX_PATH, "system.ini") > 0) { - /* - * User exists, but there is no such thing as a home directory - * in system.ini. Return "{Windows drive}:/". - */ - - GetWindowsDirectoryA(buf, MAX_PATH); - Tcl_DStringAppend(bufferPtr, buf, 3); - result = Tcl_DStringValue(bufferPtr); - } - } - } return result; } -- cgit v0.12 From c862e5709590a9330c9b814109a0fbfd70f027cb Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 4 Mar 2023 15:11:01 +0000 Subject: Add test for [9c5a00c69d], tilde expansion of ~user --- tests/fileSystem.test | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/fileSystem.test b/tests/fileSystem.test index f363d86..2de778a 100644 --- a/tests/fileSystem.test +++ b/tests/fileSystem.test @@ -277,6 +277,16 @@ test filesystem-1.30.1 {normalisation of existing user} -body { test filesystem-1.30.2 {normalisation of nonexistent user specified as user@domain} -body { file normalize ~nonexistentuser@nonexistentdomain } -returnCodes error -result {user "nonexistentuser@nonexistentdomain" doesn't exist} +test filesystem-1.30.3 {file normalization should distinguish between ~ and ~user} -setup { + set oldhome $::env(HOME) + set olduserhome [file normalize ~$::tcl_platform(user)] + set ::env(HOME) [file join $oldhome temp] +} -cleanup { + set env(HOME) $oldhome +} -body { + list [string equal [file normalize ~] $::env(HOME)] \ + [string equal $olduserhome [file normalize ~$::tcl_platform(user)]] +} -result {1 1} test filesystem-1.31 {link normalisation: link near filesystem root} {testsetplatform} { testsetplatform unix file normalize /foo/../bar -- cgit v0.12 From 7b3ef36925e938aa7a1aff22d3d3e521e32f243d Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 4 Mar 2023 16:26:00 +0000 Subject: Protect zlib errors with check for null interp --- generic/tclZlib.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index ce8da3c..cd3b3c5 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -449,12 +449,16 @@ GenerateHeader( headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); if (result != TCL_OK) { - if (result == TCL_CONVERT_UNKNOWN) { - Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL); - } else { - Tcl_AppendResult(interp, "Comment too large for zip", NULL); + if (interp) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult( + interp, "Comment contains characters > 0xFF", NULL); + } + else { + Tcl_AppendResult(interp, "Comment too large for zip", NULL); + } } - result = TCL_ERROR; + result = TCL_ERROR; /* TCL_CONVERT_* -> TCL_ERROR*/ goto error; } headerPtr->nativeCommentBuf[len] = '\0'; @@ -481,12 +485,17 @@ GenerateHeader( headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); if (result != TCL_OK) { - if (result == TCL_CONVERT_UNKNOWN) { - Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL); - } else { - Tcl_AppendResult(interp, "Filename too large for zip", NULL); + if (interp) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult( + interp, "Filename contains characters > 0xFF", NULL); + } + else { + Tcl_AppendResult( + interp, "Filename too large for zip", NULL); + } } - result = TCL_ERROR; + result = TCL_ERROR; /* TCL_CONVERT_* -> TCL_ERROR*/ goto error; } headerPtr->nativeFilenameBuf[len] = '\0'; -- cgit v0.12 From 33d81b98be1160ae0475a3d162cec7359264c8c8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 4 Mar 2023 18:12:49 +0000 Subject: Make dltest/pkg*.c simple example how to use Tcl_Size with Tcl_GetStringFromObj() --- unix/dltest/pkga.c | 2 +- unix/dltest/pkgua.c | 2 +- "unix/dltest/pkg\317\200.c" | 3 --- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/unix/dltest/pkga.c b/unix/dltest/pkga.c index 579c323..aacb9cd 100644 --- a/unix/dltest/pkga.c +++ b/unix/dltest/pkga.c @@ -40,7 +40,7 @@ Pkga_EqObjCmd( { int result; const char *str1, *str2; - int len1, len2; + Tcl_Size len1, len2; (void)dummy; if (objc != 3) { diff --git a/unix/dltest/pkgua.c b/unix/dltest/pkgua.c index 16684a8..b14fca8 100644 --- a/unix/dltest/pkgua.c +++ b/unix/dltest/pkgua.c @@ -127,7 +127,7 @@ PkguaEqObjCmd( { int result; const char *str1, *str2; - int len1, len2; + Tcl_Size len1, len2; (void)dummy; if (objc != 3) { diff --git "a/unix/dltest/pkg\317\200.c" "b/unix/dltest/pkg\317\200.c" index dc01fbd..58b36db 100644 --- "a/unix/dltest/pkg\317\200.c" +++ "b/unix/dltest/pkg\317\200.c" @@ -38,9 +38,6 @@ Pkg\u03C0_\u03A0ObjCmd( int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - int result; - const char *str1, *str2; - int len1, len2; (void)dummy; if (objc != 1) { -- cgit v0.12 From 923ff1e3ca4171dd5d562edfcfc4aaab9dfb8d7a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 5 Mar 2023 00:26:54 +0000 Subject: More -1 -> TCL_INDEX_NONE --- generic/tclArithSeries.c | 4 +-- generic/tclCkalloc.c | 2 +- generic/tclCompCmds.c | 32 +++++++++--------- generic/tclCompCmdsSZ.c | 10 +++--- generic/tclCompile.c | 2 +- generic/tclConfig.c | 12 +++---- generic/tclDictObj.c | 10 +++--- generic/tclEnv.c | 8 ++--- generic/tclExecute.c | 2 +- generic/tclFCmd.c | 4 +-- generic/tclHistory.c | 2 +- generic/tclIOCmd.c | 2 +- generic/tclLink.c | 10 +++--- generic/tclListObj.c | 2 +- generic/tclLiteral.c | 2 +- generic/tclLoad.c | 26 +++++++-------- generic/tclLoadNone.c | 2 +- generic/tclOOCall.c | 2 +- generic/tclOOMethod.c | 10 +++--- generic/tclPathObj.c | 2 +- generic/tclPkg.c | 22 ++++++------- generic/tclProc.c | 18 +++++----- generic/tclRegexp.c | 6 ++-- generic/tclResult.c | 6 ++-- generic/tclStrToD.c | 4 +-- generic/tclStringObj.c | 6 ++-- generic/tclStubInit.c | 4 +-- generic/tclTestObj.c | 42 ++++++++++++------------ generic/tclTestProcBodyObj.c | 2 +- generic/tclThreadTest.c | 10 +++--- generic/tclTimer.c | 28 ++++++++-------- generic/tclZipfs.c | 78 ++++++++++++++++++++++---------------------- generic/tclZlib.c | 62 +++++++++++++++++------------------ macosx/tclMacOSXFCmd.c | 6 ++-- 34 files changed, 220 insertions(+), 220 deletions(-) diff --git a/generic/tclArithSeries.c b/generic/tclArithSeries.c index 48efa8c..0232746 100755 --- a/generic/tclArithSeries.c +++ b/generic/tclArithSeries.c @@ -357,7 +357,7 @@ TclNewArithSeriesObj( if (TCL_MAJOR_VERSION < 9 && len > ListSizeT_MAX) { Tcl_SetObjResult( interp, - Tcl_NewStringObj("max length of a Tcl list exceeded", -1)); + Tcl_NewStringObj("max length of a Tcl list exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); return TCL_ERROR; } @@ -865,7 +865,7 @@ TclArithSeriesGetElements( if (interp) { Tcl_SetObjResult( interp, - Tcl_NewStringObj("max length of a Tcl list exceeded", -1)); + Tcl_NewStringObj("max length of a Tcl list exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return TCL_ERROR; diff --git a/generic/tclCkalloc.c b/generic/tclCkalloc.c index f7cab9f..6f31940 100644 --- a/generic/tclCkalloc.c +++ b/generic/tclCkalloc.c @@ -189,7 +189,7 @@ TclDumpMemoryInfo( fprintf((FILE *)clientData, "%s", buf); } else { /* Assume objPtr to append to */ - Tcl_AppendToObj((Tcl_Obj *) clientData, buf, -1); + Tcl_AppendToObj((Tcl_Obj *) clientData, buf, TCL_INDEX_NONE); } return 1; } diff --git a/generic/tclCompCmds.c b/generic/tclCompCmds.c index cb3cf1e..3a61a94 100644 --- a/generic/tclCompCmds.c +++ b/generic/tclCompCmds.c @@ -2300,7 +2300,7 @@ PrintDictUpdateInfo( for (i=0 ; ilength ; i++) { if (i) { - Tcl_AppendToObj(appendObj, ", ", -1); + Tcl_AppendToObj(appendObj, ", ", TCL_INDEX_NONE); } Tcl_AppendPrintfToObj(appendObj, "%%v%" TCL_Z_MODIFIER "u", duiPtr->varIndices[i]); } @@ -2322,7 +2322,7 @@ DisassembleDictUpdateInfo( Tcl_ListObjAppendElement(NULL, variables, Tcl_NewWideIntObj(duiPtr->varIndices[i])); } - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("variables", -1), + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("variables", TCL_INDEX_NONE), variables); } @@ -2982,11 +2982,11 @@ PrintForeachInfo( ForeachVarList *varsPtr; size_t i, j; - Tcl_AppendToObj(appendObj, "data=[", -1); + Tcl_AppendToObj(appendObj, "data=[", TCL_INDEX_NONE); for (i=0 ; inumLists ; i++) { if (i) { - Tcl_AppendToObj(appendObj, ", ", -1); + Tcl_AppendToObj(appendObj, ", ", TCL_INDEX_NONE); } Tcl_AppendPrintfToObj(appendObj, "%%v%" TCL_Z_MODIFIER "u", (infoPtr->firstValueTemp + i)); @@ -2995,19 +2995,19 @@ PrintForeachInfo( infoPtr->loopCtTemp); for (i=0 ; inumLists ; i++) { if (i) { - Tcl_AppendToObj(appendObj, ",", -1); + Tcl_AppendToObj(appendObj, ",", TCL_INDEX_NONE); } Tcl_AppendPrintfToObj(appendObj, "\n\t\t it%%v%" TCL_Z_MODIFIER "u\t[", (infoPtr->firstValueTemp + i)); varsPtr = infoPtr->varLists[i]; for (j=0 ; jnumVars ; j++) { if (j) { - Tcl_AppendToObj(appendObj, ", ", -1); + Tcl_AppendToObj(appendObj, ", ", TCL_INDEX_NONE); } Tcl_AppendPrintfToObj(appendObj, "%%v%" TCL_Z_MODIFIER "u", varsPtr->varIndexes[j]); } - Tcl_AppendToObj(appendObj, "]", -1); + Tcl_AppendToObj(appendObj, "]", TCL_INDEX_NONE); } } @@ -3026,18 +3026,18 @@ PrintNewForeachInfo( infoPtr->loopCtTemp); for (i=0 ; inumLists ; i++) { if (i) { - Tcl_AppendToObj(appendObj, ",", -1); + Tcl_AppendToObj(appendObj, ",", TCL_INDEX_NONE); } - Tcl_AppendToObj(appendObj, "[", -1); + Tcl_AppendToObj(appendObj, "[", TCL_INDEX_NONE); varsPtr = infoPtr->varLists[i]; for (j=0 ; jnumVars ; j++) { if (j) { - Tcl_AppendToObj(appendObj, ",", -1); + Tcl_AppendToObj(appendObj, ",", TCL_INDEX_NONE); } Tcl_AppendPrintfToObj(appendObj, "%%v%" TCL_Z_MODIFIER "u", varsPtr->varIndexes[j]); } - Tcl_AppendToObj(appendObj, "]", -1); + Tcl_AppendToObj(appendObj, "]", TCL_INDEX_NONE); } } @@ -3062,13 +3062,13 @@ DisassembleForeachInfo( Tcl_ListObjAppendElement(NULL, objPtr, Tcl_NewWideIntObj(infoPtr->firstValueTemp + i)); } - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("data", -1), objPtr); + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("data", TCL_INDEX_NONE), objPtr); /* * Loop counter. */ - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("loop", -1), + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("loop", TCL_INDEX_NONE), Tcl_NewWideIntObj(infoPtr->loopCtTemp)); /* @@ -3085,7 +3085,7 @@ DisassembleForeachInfo( } Tcl_ListObjAppendElement(NULL, objPtr, innerPtr); } - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("assign", -1), objPtr); + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("assign", TCL_INDEX_NONE), objPtr); } static void @@ -3104,7 +3104,7 @@ DisassembleNewForeachInfo( * Jump offset. */ - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("jumpOffset", -1), + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("jumpOffset", TCL_INDEX_NONE), Tcl_NewWideIntObj(infoPtr->loopCtTemp)); /* @@ -3121,7 +3121,7 @@ DisassembleNewForeachInfo( } Tcl_ListObjAppendElement(NULL, objPtr, innerPtr); } - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("assign", -1), objPtr); + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("assign", TCL_INDEX_NONE), objPtr); } /* diff --git a/generic/tclCompCmdsSZ.c b/generic/tclCompCmdsSZ.c index 0e98584..b86aa43 100644 --- a/generic/tclCompCmdsSZ.c +++ b/generic/tclCompCmdsSZ.c @@ -2604,9 +2604,9 @@ PrintJumptableInfo( offset = PTR2INT(Tcl_GetHashValue(hPtr)); if (i++) { - Tcl_AppendToObj(appendObj, ", ", -1); + Tcl_AppendToObj(appendObj, ", ", TCL_INDEX_NONE); if (i%4==0) { - Tcl_AppendToObj(appendObj, "\n\t\t", -1); + Tcl_AppendToObj(appendObj, "\n\t\t", TCL_INDEX_NONE); } } Tcl_AppendPrintfToObj(appendObj, "\"%s\"->pc %" TCL_Z_MODIFIER "u", @@ -2633,10 +2633,10 @@ DisassembleJumptableInfo( for (; hPtr ; hPtr = Tcl_NextHashEntry(&search)) { keyPtr = (const char *)Tcl_GetHashKey(&jtPtr->hashTable, hPtr); offset = PTR2INT(Tcl_GetHashValue(hPtr)); - Tcl_DictObjPut(NULL, mapping, Tcl_NewStringObj(keyPtr, -1), + Tcl_DictObjPut(NULL, mapping, Tcl_NewStringObj(keyPtr, TCL_INDEX_NONE), Tcl_NewWideIntObj(offset)); } - Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("mapping", -1), mapping); + Tcl_DictObjPut(NULL, dictObj, Tcl_NewStringObj("mapping", TCL_INDEX_NONE), mapping); } /* @@ -4081,7 +4081,7 @@ CompileAssociativeBinaryOpCmd( CompileWord(envPtr, tokenPtr, interp, words); } if (parsePtr->numWords <= 2) { - PushLiteral(envPtr, identity, -1); + PushLiteral(envPtr, identity, TCL_INDEX_NONE); words++; } if (words > 3) { diff --git a/generic/tclCompile.c b/generic/tclCompile.c index 9708255..be308e3 100644 --- a/generic/tclCompile.c +++ b/generic/tclCompile.c @@ -2161,7 +2161,7 @@ TclCompileScript( */ if (iPtr->numLevels / 5 > iPtr->maxNestingDepth / 4) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "too many nested compilations (infinite loop?)", -1)); + "too many nested compilations (infinite loop?)", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LIMIT", "STACK", NULL); TclCompileSyntaxError(interp, envPtr); return; diff --git a/generic/tclConfig.c b/generic/tclConfig.c index 1ece31c..17490bd 100644 --- a/generic/tclConfig.c +++ b/generic/tclConfig.c @@ -85,7 +85,7 @@ Tcl_RegisterConfig( } else { cdPtr->encoding = NULL; } - cdPtr->pkg = Tcl_NewStringObj(pkgName, -1); + cdPtr->pkg = Tcl_NewStringObj(pkgName, TCL_INDEX_NONE); /* * Phase I: Adding the provided information to the internal database of @@ -127,7 +127,7 @@ Tcl_RegisterConfig( */ for (cfg=configuration ; cfg->key!=NULL && cfg->key[0]!='\0' ; cfg++) { - Tcl_DictObjPut(interp, pkgDict, Tcl_NewStringObj(cfg->key, -1), + Tcl_DictObjPut(interp, pkgDict, Tcl_NewStringObj(cfg->key, TCL_INDEX_NONE), Tcl_NewByteArrayObj((unsigned char *)cfg->value, strlen(cfg->value))); } @@ -144,7 +144,7 @@ Tcl_RegisterConfig( Tcl_DStringInit(&cmdName); TclDStringAppendLiteral(&cmdName, "::"); - Tcl_DStringAppend(&cmdName, pkgName, -1); + Tcl_DStringAppend(&cmdName, pkgName, TCL_INDEX_NONE); /* * The incomplete command name is the name of the namespace to place it @@ -227,7 +227,7 @@ QueryConfigObjCmd( * present. */ - Tcl_SetObjResult(interp, Tcl_NewStringObj("package not known", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("package not known", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "FATAL", "PKGCFG_BASE", TclGetString(pkgName), NULL); return TCL_ERROR; @@ -242,7 +242,7 @@ QueryConfigObjCmd( if (Tcl_DictObjGet(interp, pkgDict, objv[2], &val) != TCL_OK || val == NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("key not known", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("key not known", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "CONFIG", TclGetString(objv[2]), NULL); return TCL_ERROR; @@ -279,7 +279,7 @@ QueryConfigObjCmd( if (!listPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "insufficient memory to create list", -1)); + "insufficient memory to create list", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); return TCL_ERROR; } diff --git a/generic/tclDictObj.c b/generic/tclDictObj.c index 04a909f..5c18c8a 100644 --- a/generic/tclDictObj.c +++ b/generic/tclDictObj.c @@ -717,7 +717,7 @@ SetDictFromAny( missingValue: if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing value to go with key", -1)); + "missing value to go with key", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "DICTIONARY", NULL); } errorInFindDictElement: @@ -2119,7 +2119,7 @@ DictInfoCmd( } statsStr = Tcl_HashStats(&dict->table); - Tcl_SetObjResult(interp, Tcl_NewStringObj(statsStr, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(statsStr, TCL_INDEX_NONE)); Tcl_Free(statsStr); return TCL_OK; } @@ -2481,7 +2481,7 @@ DictForNRCmd( } if (varc != 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must have exactly two variable names", -1)); + "must have exactly two variable names", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "SYNTAX", "dict", "for", NULL); return TCL_ERROR; } @@ -2676,7 +2676,7 @@ DictMapNRCmd( } if (varc != 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must have exactly two variable names", -1)); + "must have exactly two variable names", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "SYNTAX", "dict", "map", NULL); return TCL_ERROR; } @@ -3116,7 +3116,7 @@ DictFilterCmd( } if (varc != 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must have exactly two variable names", -1)); + "must have exactly two variable names", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "SYNTAX", "dict", "filter", NULL); return TCL_ERROR; } diff --git a/generic/tclEnv.c b/generic/tclEnv.c index 630e89c..6dae72a 100644 --- a/generic/tclEnv.c +++ b/generic/tclEnv.c @@ -185,8 +185,8 @@ TclSetupEnv( p1 = "COMSPEC"; } #endif - obj1 = Tcl_NewStringObj(p1, -1); - obj2 = Tcl_NewStringObj(p2, -1); + obj1 = Tcl_NewStringObj(p1, TCL_INDEX_NONE); + obj2 = Tcl_NewStringObj(p2, TCL_INDEX_NONE); Tcl_DStringFree(&envString); Tcl_IncrRefCount(obj1); @@ -406,7 +406,7 @@ Tcl_PutEnv( * name and value parts, and call TclSetEnv to do all of the real work. */ - name = Tcl_ExternalToUtfDString(NULL, assignment, -1, &nameString); + name = Tcl_ExternalToUtfDString(NULL, assignment, TCL_INDEX_NONE, &nameString); value = (char *)strchr(name, '='); if ((value != NULL) && (value != name)) { @@ -582,7 +582,7 @@ TclGetEnv( if (*result == '=') { result++; Tcl_DStringInit(valuePtr); - Tcl_DStringAppend(valuePtr, result, -1); + Tcl_DStringAppend(valuePtr, result, TCL_INDEX_NONE); result = Tcl_DStringValue(valuePtr); } else { result = NULL; diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 1e23517..81ce1a7 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5039,7 +5039,7 @@ TEBCresume( case INST_LREPLACE4: { - Tcl_Size numToDelete, numNewElems; + size_t numToDelete, numNewElems; int end_indicator; int haveSecondIndex, flags; Tcl_Obj *fromIdxObj, *toIdxObj; diff --git a/generic/tclFCmd.c b/generic/tclFCmd.c index 89550d9..c1dbc88 100644 --- a/generic/tclFCmd.c +++ b/generic/tclFCmd.c @@ -1042,7 +1042,7 @@ TclFileAttrsCmd( res = Tcl_FSFileAttrsGet(interp, index, filePtr, &objPtrAttr); if (res == TCL_OK) { Tcl_Obj *objPtr = - Tcl_NewStringObj(attributeStrings[index], -1); + Tcl_NewStringObj(attributeStrings[index], TCL_INDEX_NONE); Tcl_ListObjAppendElement(interp, listPtr, objPtr); Tcl_ListObjAppendElement(interp, listPtr, objPtrAttr); @@ -1492,7 +1492,7 @@ TclFileTemporaryCmd( return TCL_ERROR; } } - Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetChannelName(chan), -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetChannelName(chan), TCL_INDEX_NONE)); return TCL_OK; } diff --git a/generic/tclHistory.c b/generic/tclHistory.c index dc5a67d..8083b4d 100644 --- a/generic/tclHistory.c +++ b/generic/tclHistory.c @@ -69,7 +69,7 @@ Tcl_RecordAndEval( * Call Tcl_RecordAndEvalObj to do the actual work. */ - cmdPtr = Tcl_NewStringObj(cmd, -1); + cmdPtr = Tcl_NewStringObj(cmd, TCL_INDEX_NONE); Tcl_IncrRefCount(cmdPtr); result = Tcl_RecordAndEvalObj(interp, cmdPtr, flags); diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 197ca32..2298d48 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -1080,7 +1080,7 @@ Tcl_OpenObjCmd( if (objc == 4) { const char *permString = TclGetString(objv[3]); int code = TCL_ERROR; - int scanned = TclParseAllWhiteSpace(permString, -1); + int scanned = TclParseAllWhiteSpace(permString, TCL_INDEX_NONE); /* * Support legacy octal numbers. diff --git a/generic/tclLink.c b/generic/tclLink.c index 37c104b..eec778a 100644 --- a/generic/tclLink.c +++ b/generic/tclLink.c @@ -175,7 +175,7 @@ Tcl_LinkVar( linkPtr = (Link *)Tcl_Alloc(sizeof(Link)); linkPtr->interp = interp; linkPtr->nsPtr = NULL; - linkPtr->varName = Tcl_NewStringObj(varName, -1); + linkPtr->varName = Tcl_NewStringObj(varName, TCL_INDEX_NONE); Tcl_IncrRefCount(linkPtr->varName); linkPtr->addr = addr; linkPtr->type = type & ~TCL_LINK_READ_ONLY; @@ -256,7 +256,7 @@ Tcl_LinkArray( if (size < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "wrong array size given", -1)); + "wrong array size given", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -340,7 +340,7 @@ Tcl_LinkArray( default: LinkFree(linkPtr); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "bad linked array variable type", -1)); + "bad linked array variable type", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -380,7 +380,7 @@ Tcl_LinkArray( */ linkPtr->interp = interp; - linkPtr->varName = Tcl_NewStringObj(varName, -1); + linkPtr->varName = Tcl_NewStringObj(varName, TCL_INDEX_NONE); Tcl_IncrRefCount(linkPtr->varName); TclGetNamespaceForQualName(interp, varName, NULL, TCL_GLOBAL_ONLY, @@ -1433,7 +1433,7 @@ ObjValue( TclNewLiteralStringObj(resultObj, "NULL"); return resultObj; } - return Tcl_NewStringObj(p, -1); + return Tcl_NewStringObj(p, TCL_INDEX_NONE); case TCL_LINK_CHARS: if (linkPtr->flags & LINK_ALLOC_LAST) { diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 9102af0..7cf96cb 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -526,7 +526,7 @@ ListLimitExceededError(Tcl_Interp *interp) if (interp != NULL) { Tcl_SetObjResult( interp, - Tcl_NewStringObj("max length of a Tcl list exceeded", -1)); + Tcl_NewStringObj("max length of a Tcl list exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); } return TCL_ERROR; diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index dfb92cb..24e99fc 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -1057,7 +1057,7 @@ TclInvalidateCmdLiteral( { Interp *iPtr = (Interp *) interp; Tcl_Obj *literalObjPtr = TclCreateLiteral(iPtr, name, - strlen(name), -1, NULL, nsPtr, 0, NULL); + strlen(name), TCL_INDEX_NONE, NULL, nsPtr, 0, NULL); if (literalObjPtr != NULL) { if (TclHasInternalRep(literalObjPtr, &tclCmdNameType)) { diff --git a/generic/tclLoad.c b/generic/tclLoad.c index fa0b584..b66122d 100644 --- a/generic/tclLoad.c +++ b/generic/tclLoad.c @@ -192,7 +192,7 @@ Tcl_LoadObjCmd( } if ((fullFileName[0] == 0) && (prefix == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must specify either file name or prefix", -1)); + "must specify either file name or prefix", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LOAD", "NOLIBRARY", NULL); code = TCL_ERROR; @@ -232,9 +232,9 @@ Tcl_LoadObjCmd( namesMatch = 0; } else { TclDStringClear(&pfx); - Tcl_DStringAppend(&pfx, prefix, -1); + Tcl_DStringAppend(&pfx, prefix, TCL_INDEX_NONE); TclDStringClear(&tmp); - Tcl_DStringAppend(&tmp, libraryPtr->prefix, -1); + Tcl_DStringAppend(&tmp, libraryPtr->prefix, TCL_INDEX_NONE); if (strcmp(Tcl_DStringValue(&tmp), Tcl_DStringValue(&pfx)) == 0) { namesMatch = 1; @@ -307,7 +307,7 @@ Tcl_LoadObjCmd( */ if (prefix != NULL) { - Tcl_DStringAppend(&pfx, prefix, -1); + Tcl_DStringAppend(&pfx, prefix, TCL_INDEX_NONE); } else { Tcl_Obj *splitPtr, *pkgGuessPtr; size_t pElements; @@ -487,7 +487,7 @@ Tcl_LoadObjCmd( * this interp are incompatible in their stubs mechanisms, and * recorded the error in the oldest legacy place we have to do so. */ - Tcl_SetObjResult(target, Tcl_NewStringObj(iPtr->legacyResult, -1)); + Tcl_SetObjResult(target, Tcl_NewStringObj(iPtr->legacyResult, TCL_INDEX_NONE)); iPtr->legacyResult = NULL; iPtr->legacyFreeProc = (void (*) (void))-1; } @@ -625,7 +625,7 @@ Tcl_UnloadObjCmd( } if ((fullFileName[0] == 0) && (prefix == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must specify either file name or prefix", -1)); + "must specify either file name or prefix", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "UNLOAD", "NOLIBRARY", NULL); code = TCL_ERROR; @@ -665,9 +665,9 @@ Tcl_UnloadObjCmd( namesMatch = 0; } else { TclDStringClear(&pfx); - Tcl_DStringAppend(&pfx, prefix, -1); + Tcl_DStringAppend(&pfx, prefix, TCL_INDEX_NONE); TclDStringClear(&tmp); - Tcl_DStringAppend(&tmp, libraryPtr->prefix, -1); + Tcl_DStringAppend(&tmp, libraryPtr->prefix, TCL_INDEX_NONE); if (strcmp(Tcl_DStringValue(&tmp), Tcl_DStringValue(&pfx)) == 0) { namesMatch = 1; @@ -1121,8 +1121,8 @@ TclGetLoadedLibraries( Tcl_MutexLock(&libraryMutex); for (libraryPtr = firstLibraryPtr; libraryPtr != NULL; libraryPtr = libraryPtr->nextPtr) { - pkgDesc[0] = Tcl_NewStringObj(libraryPtr->fileName, -1); - pkgDesc[1] = Tcl_NewStringObj(libraryPtr->prefix, -1); + pkgDesc[0] = Tcl_NewStringObj(libraryPtr->fileName, TCL_INDEX_NONE); + pkgDesc[1] = Tcl_NewStringObj(libraryPtr->prefix, TCL_INDEX_NONE); Tcl_ListObjAppendElement(NULL, resultObj, Tcl_NewListObj(2, pkgDesc)); } @@ -1147,7 +1147,7 @@ TclGetLoadedLibraries( libraryPtr = ipPtr->libraryPtr; if (!strcmp(prefix, libraryPtr->prefix)) { - resultObj = Tcl_NewStringObj(libraryPtr->fileName, -1); + resultObj = Tcl_NewStringObj(libraryPtr->fileName, TCL_INDEX_NONE); break; } } @@ -1166,8 +1166,8 @@ TclGetLoadedLibraries( TclNewObj(resultObj); for (; ipPtr != NULL; ipPtr = ipPtr->nextPtr) { libraryPtr = ipPtr->libraryPtr; - pkgDesc[0] = Tcl_NewStringObj(libraryPtr->fileName, -1); - pkgDesc[1] = Tcl_NewStringObj(libraryPtr->prefix, -1); + pkgDesc[0] = Tcl_NewStringObj(libraryPtr->fileName, TCL_INDEX_NONE); + pkgDesc[1] = Tcl_NewStringObj(libraryPtr->prefix, TCL_INDEX_NONE); Tcl_ListObjAppendElement(NULL, resultObj, Tcl_NewListObj(2, pkgDesc)); } Tcl_SetObjResult(interp, resultObj); diff --git a/generic/tclLoadNone.c b/generic/tclLoadNone.c index f60f843..abf6eda 100644 --- a/generic/tclLoadNone.c +++ b/generic/tclLoadNone.c @@ -81,7 +81,7 @@ TclpLoadMemory( { if (interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj("dynamic loading from memory " - "is not available on this system", -1)); + "is not available on this system", TCL_INDEX_NONE)); } return TCL_ERROR; } diff --git a/generic/tclOOCall.c b/generic/tclOOCall.c index 450fc9f..fcf7f2b 100644 --- a/generic/tclOOCall.c +++ b/generic/tclOOCall.c @@ -1848,7 +1848,7 @@ TclOORenderCallChain( ? Tcl_GetObjectName(interp, (Tcl_Object) miPtr->mPtr->declaringClassPtr->thisPtr) : objectLiteral; - descObjs[3] = Tcl_NewStringObj(miPtr->mPtr->typePtr->name, -1); + descObjs[3] = Tcl_NewStringObj(miPtr->mPtr->typePtr->name, TCL_INDEX_NONE); objv[i] = Tcl_NewListObj(4, descObjs); } diff --git a/generic/tclOOMethod.c b/generic/tclOOMethod.c index 70f9503..2ac21b8 100644 --- a/generic/tclOOMethod.c +++ b/generic/tclOOMethod.c @@ -387,7 +387,7 @@ TclOONewBasicMethod( /* Name of the method, whether it is public, * and the function to implement it. */ { - Tcl_Obj *namePtr = Tcl_NewStringObj(dcm->name, -1); + Tcl_Obj *namePtr = Tcl_NewStringObj(dcm->name, TCL_INDEX_NONE); Tcl_IncrRefCount(namePtr); TclNewMethod(interp, (Tcl_Class) clsPtr, namePtr, @@ -1410,7 +1410,7 @@ CloneProcedureMethod( TclNewObj(argObj); Tcl_ListObjAppendElement(NULL, argObj, - Tcl_NewStringObj(localPtr->name, -1)); + Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE)); if (localPtr->defValuePtr != NULL) { Tcl_ListObjAppendElement(NULL, argObj, localPtr->defValuePtr); } @@ -1481,7 +1481,7 @@ TclOONewForwardInstanceMethod( } if (prefixLen < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method forward prefix must be non-empty", -1)); + "method forward prefix must be non-empty", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "BAD_FORWARD", NULL); return NULL; } @@ -1520,7 +1520,7 @@ TclOONewForwardMethod( } if (prefixLen < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method forward prefix must be non-empty", -1)); + "method forward prefix must be non-empty", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "BAD_FORWARD", NULL); return NULL; } @@ -1707,7 +1707,7 @@ InitEnsembleRewrite( int *lengthPtr) /* Where to write the resulting length of the * array of rewritten arguments. */ { - unsigned len = rewriteLength + objc - toRewrite; + size_t len = rewriteLength + objc - toRewrite; Tcl_Obj **argObjs = (Tcl_Obj **)TclStackAlloc(interp, sizeof(Tcl_Obj *) * len); memcpy(argObjs, rewriteObjs, rewriteLength * sizeof(Tcl_Obj *)); diff --git a/generic/tclPathObj.c b/generic/tclPathObj.c index d0826b7..19c1b9d 100644 --- a/generic/tclPathObj.c +++ b/generic/tclPathObj.c @@ -751,7 +751,7 @@ GetExtension( if (extension == NULL) { TclNewObj(ret); } else { - ret = Tcl_NewStringObj(extension, -1); + ret = Tcl_NewStringObj(extension, TCL_INDEX_NONE); } Tcl_IncrRefCount(ret); return ret; diff --git a/generic/tclPkg.c b/generic/tclPkg.c index 34346f9..132a219 100644 --- a/generic/tclPkg.c +++ b/generic/tclPkg.c @@ -165,7 +165,7 @@ Tcl_PkgProvideEx( pkgPtr = FindPackage(interp, name); if (pkgPtr->version == NULL) { - pkgPtr->version = Tcl_NewStringObj(version, -1); + pkgPtr->version = Tcl_NewStringObj(version, TCL_INDEX_NONE); Tcl_IncrRefCount(pkgPtr->version); pkgPtr->clientData = clientData; return TCL_OK; @@ -291,7 +291,7 @@ TclPkgFileSeen( } else { list = (Tcl_Obj *)Tcl_GetHashValue(entry); } - Tcl_ListObjAppendElement(interp, list, Tcl_NewStringObj(fileName, -1)); + Tcl_ListObjAppendElement(interp, list, Tcl_NewStringObj(fileName, TCL_INDEX_NONE)); } } @@ -407,7 +407,7 @@ Tcl_PkgRequireEx( != CheckVersionAndConvert(interp, version, NULL, NULL)) { return NULL; } - ov = Tcl_NewStringObj(version, -1); + ov = Tcl_NewStringObj(version, TCL_INDEX_NONE); if (exact) { Tcl_AppendStringsToObj(ov, "-", version, NULL); } @@ -531,7 +531,7 @@ PkgRequireCoreStep1( */ Tcl_DStringInit(&command); - Tcl_DStringAppend(&command, script, -1); + Tcl_DStringAppend(&command, script, TCL_INDEX_NONE); Tcl_DStringAppendElement(&command, name); AddRequirementsToDString(&command, reqc, reqv); @@ -839,7 +839,7 @@ SelectPackage( Tcl_NRAddCallback(interp, SelectPackageFinal, reqPtr, INT2PTR(reqc), (void *)reqv, data[3]); - Tcl_NREvalObj(interp, Tcl_NewStringObj(bestPtr->script, -1), + Tcl_NREvalObj(interp, Tcl_NewStringObj(bestPtr->script, TCL_INDEX_NONE), TCL_EVAL_GLOBAL); } return TCL_OK; @@ -1200,7 +1200,7 @@ TclNRPackageObjCmd( if (objc == 4) { Tcl_Free(argv3i); Tcl_SetObjResult(interp, - Tcl_NewStringObj(availPtr->script, -1)); + Tcl_NewStringObj(availPtr->script, TCL_INDEX_NONE)); return TCL_OK; } Tcl_EventuallyFree(availPtr->script, TCL_DYNAMIC); @@ -1251,7 +1251,7 @@ TclNRPackageObjCmd( pkgPtr = (Package *)Tcl_GetHashValue(hPtr); if ((pkgPtr->version != NULL) || (pkgPtr->availPtr != NULL)) { Tcl_ListObjAppendElement(NULL,resultObj, Tcl_NewStringObj( - (char *)Tcl_GetHashKey(tablePtr, hPtr), -1)); + (char *)Tcl_GetHashKey(tablePtr, hPtr), TCL_INDEX_NONE)); } } Tcl_SetObjResult(interp, resultObj); @@ -1353,7 +1353,7 @@ TclNRPackageObjCmd( * Create a new-style requirement for the exact version. */ - ov = Tcl_NewStringObj(version, -1); + ov = Tcl_NewStringObj(version, TCL_INDEX_NONE); Tcl_AppendStringsToObj(ov, "-", version, NULL); version = NULL; argv3 = TclGetString(objv[3]); @@ -1404,7 +1404,7 @@ TclNRPackageObjCmd( if (objc == 2) { if (iPtr->packageUnknown != NULL) { Tcl_SetObjResult(interp, - Tcl_NewStringObj(iPtr->packageUnknown, -1)); + Tcl_NewStringObj(iPtr->packageUnknown, TCL_INDEX_NONE)); } } else if (objc == 3) { if (iPtr->packageUnknown != NULL) { @@ -1456,7 +1456,7 @@ TclNRPackageObjCmd( */ Tcl_SetObjResult(interp, - Tcl_NewStringObj(pkgPreferOptions[iPtr->packagePrefer], -1)); + Tcl_NewStringObj(pkgPreferOptions[iPtr->packagePrefer], TCL_INDEX_NONE)); break; } case PKG_VCOMPARE: @@ -1503,7 +1503,7 @@ TclNRPackageObjCmd( for (availPtr = pkgPtr->availPtr; availPtr != NULL; availPtr = availPtr->nextPtr) { Tcl_ListObjAppendElement(NULL, resultObj, - Tcl_NewStringObj(availPtr->version, -1)); + Tcl_NewStringObj(availPtr->version, TCL_INDEX_NONE)); } } Tcl_SetObjResult(interp, resultObj); diff --git a/generic/tclProc.c b/generic/tclProc.c index 01bc337..c8a304a 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -524,9 +524,9 @@ TclCreateProc( } if (fieldCount > 2) { Tcl_Obj *errorObj = Tcl_NewStringObj( - "too many fields in argument specifier \"", -1); + "too many fields in argument specifier \"", TCL_INDEX_NONE); Tcl_AppendObjToObj(errorObj, argArray[i]); - Tcl_AppendToObj(errorObj, "\"", -1); + Tcl_AppendToObj(errorObj, "\"", TCL_INDEX_NONE); Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "FORMALARGUMENTFORMAT", NULL); @@ -534,7 +534,7 @@ TclCreateProc( } if ((fieldCount == 0) || (Tcl_GetCharLength(fieldValues[0]) == 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "argument with no name", -1)); + "argument with no name", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "FORMALARGUMENTFORMAT", NULL); goto procError; @@ -560,9 +560,9 @@ TclCreateProc( } } else if (*argnamei == ':' && *(argnamei+1) == ':') { Tcl_Obj *errorObj = Tcl_NewStringObj( - "formal parameter \"", -1); + "formal parameter \"", TCL_INDEX_NONE); Tcl_AppendObjToObj(errorObj, fieldValues[0]); - Tcl_AppendToObj(errorObj, "\" is not a simple name", -1); + Tcl_AppendToObj(errorObj, "\" is not a simple name", TCL_INDEX_NONE); Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "FORMALARGUMENTFORMAT", NULL); @@ -613,7 +613,7 @@ TclCreateProc( "procedure \"%s\": formal parameter \"", procName); Tcl_AppendObjToObj(errorObj, fieldValues[0]); Tcl_AppendToObj(errorObj, "\" has " - "default value inconsistent with precompiled body", -1); + "default value inconsistent with precompiled body", TCL_INDEX_NONE); Tcl_SetObjResult(interp, errorObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "BYTECODELIES", NULL); @@ -1080,7 +1080,7 @@ ProcWrongNumArgs( sizeof(Tcl_Obj *) * (numArgs+1)); if (framePtr->isProcCallFrame & FRAME_IS_LAMBDA) { - desiredObjs[0] = Tcl_NewStringObj("lambdaExpr", -1); + desiredObjs[0] = Tcl_NewStringObj("lambdaExpr", TCL_INDEX_NONE); } else { desiredObjs[0] = framePtr->objv[skip-1]; } @@ -1941,7 +1941,7 @@ TclProcCompileProc( if (codePtr->flags & TCL_BYTECODE_PRECOMPILED) { if ((Interp *) *codePtr->interpHandle != iPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "a precompiled script jumped interps", -1)); + "a precompiled script jumped interps", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "PROC", "CROSSINTERPBYTECODE", NULL); return TCL_ERROR; @@ -1969,7 +1969,7 @@ TclProcCompileProc( TclNewLiteralStringObj(message, "Compiling "); Tcl_IncrRefCount(message); Tcl_AppendStringsToObj(message, description, " \"", NULL); - Tcl_AppendLimitedToObj(message, procName, -1, 50, NULL); + Tcl_AppendLimitedToObj(message, procName, TCL_INDEX_NONE, 50, NULL); fprintf(stdout, "%s\"\n", TclGetString(message)); Tcl_DecrRefCount(message); } diff --git a/generic/tclRegexp.c b/generic/tclRegexp.c index 4e3c6c5..07beffd 100644 --- a/generic/tclRegexp.c +++ b/generic/tclRegexp.c @@ -221,9 +221,9 @@ Tcl_RegExpExec( */ Tcl_DStringInit(&ds); - ustr = Tcl_UtfToUniCharDString(text, -1, &ds); + ustr = Tcl_UtfToUniCharDString(text, TCL_INDEX_NONE, &ds); numChars = Tcl_DStringLength(&ds) / sizeof(Tcl_UniChar); - result = RegExpExecUniChar(interp, re, ustr, numChars, -1 /* nmatches */, + result = RegExpExecUniChar(interp, re, ustr, numChars, TCL_INDEX_NONE /* nmatches */, flags); Tcl_DStringFree(&ds); @@ -689,7 +689,7 @@ TclRegAbout( for (inf=infonames ; inf->bit != 0 ; inf++) { if (regexpPtr->re.re_info & inf->bit) { Tcl_ListObjAppendElement(NULL, infoObj, - Tcl_NewStringObj(inf->text, -1)); + Tcl_NewStringObj(inf->text, TCL_INDEX_NONE)); } } Tcl_ListObjAppendElement(NULL, resultObj, infoObj); diff --git a/generic/tclResult.c b/generic/tclResult.c index c0266bc..6a36fdf 100644 --- a/generic/tclResult.c +++ b/generic/tclResult.c @@ -317,7 +317,7 @@ Tcl_AppendResult( if (bytes == NULL) { break; } - Tcl_AppendToObj(objPtr, bytes, -1); + Tcl_AppendToObj(objPtr, bytes, TCL_INDEX_NONE); } Tcl_SetObjResult(interp, objPtr); va_end(argList); @@ -354,7 +354,7 @@ Tcl_AppendElement( * to result. */ { Interp *iPtr = (Interp *) interp; - Tcl_Obj *elementPtr = Tcl_NewStringObj(element, -1); + Tcl_Obj *elementPtr = Tcl_NewStringObj(element, TCL_INDEX_NONE); Tcl_Obj *listPtr = Tcl_NewListObj(1, &elementPtr); const char *bytes; size_t length; @@ -511,7 +511,7 @@ Tcl_SetErrorCode( if (elem == NULL) { break; } - Tcl_ListObjAppendElement(NULL, errorObj, Tcl_NewStringObj(elem, -1)); + Tcl_ListObjAppendElement(NULL, errorObj, Tcl_NewStringObj(elem, TCL_INDEX_NONE)); } Tcl_SetObjErrorCode(interp, errorObj); va_end(argList); diff --git a/generic/tclStrToD.c b/generic/tclStrToD.c index 597fe77..2f29617 100644 --- a/generic/tclStrToD.c +++ b/generic/tclStrToD.c @@ -1523,7 +1523,7 @@ TclParseNumber( expected); Tcl_AppendLimitedToObj(msg, bytes, numBytes, 50, ""); - Tcl_AppendToObj(msg, "\"", -1); + Tcl_AppendToObj(msg, "\"", TCL_INDEX_NONE); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "TCL", "VALUE", "NUMBER", NULL); } @@ -4787,7 +4787,7 @@ Tcl_InitBignumFromDouble( if (interp != NULL) { const char *s = "integer value too large to represent"; - Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(s, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", s, NULL); } return TCL_ERROR; diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index e1376f4..0acc6e2 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -1683,7 +1683,7 @@ AppendUtfToUnicodeRep( return; } - ExtendUnicodeRepWithString(objPtr, bytes, numBytes, -1); + ExtendUnicodeRepWithString(objPtr, bytes, numBytes, TCL_INDEX_NONE); TclInvalidateStringRep(objPtr); stringPtr = GET_STRING(objPtr); stringPtr->allocated = 0; @@ -1812,7 +1812,7 @@ Tcl_AppendStringsToObj( if (bytes == NULL) { break; } - Tcl_AppendToObj(objPtr, bytes, -1); + Tcl_AppendToObj(objPtr, bytes, TCL_INDEX_NONE); } va_end(argList); } @@ -2588,7 +2588,7 @@ Tcl_AppendFormatToObj( errorMsg: if (interp != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(msg, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "FORMAT", errCode, NULL); } error: diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 1186aa3..dbd8b52 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -337,7 +337,7 @@ static int exprInt(Tcl_Interp *interp, const char *expr, int *ptr){ *ptr = (int)longValue; } else { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "integer value too large to represent", -1)); + "integer value too large to represent", TCL_INDEX_NONE)); result = TCL_ERROR; } } @@ -353,7 +353,7 @@ static int exprIntObj(Tcl_Interp *interp, Tcl_Obj*expr, int *ptr){ *ptr = (int)longValue; } else { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "integer value too large to represent", -1)); + "integer value too large to represent", TCL_INDEX_NONE)); result = TCL_ERROR; } } diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 131601d..3bf6989 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -180,13 +180,13 @@ TestbignumobjCmd( string = Tcl_GetString(objv[3]); if (mp_init(&bignumValue) != MP_OKAY) { Tcl_SetObjResult(interp, - Tcl_NewStringObj("error in mp_init", -1)); + Tcl_NewStringObj("error in mp_init", TCL_INDEX_NONE)); return TCL_ERROR; } if (mp_read_radix(&bignumValue, string, 10) != MP_OKAY) { mp_clear(&bignumValue); Tcl_SetObjResult(interp, - Tcl_NewStringObj("error in mp_read_radix", -1)); + Tcl_NewStringObj("error in mp_read_radix", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -230,7 +230,7 @@ TestbignumobjCmd( if (mp_mul_d(&bignumValue, 10, &bignumValue) != MP_OKAY) { mp_clear(&bignumValue); Tcl_SetObjResult(interp, - Tcl_NewStringObj("error in mp_mul_d", -1)); + Tcl_NewStringObj("error in mp_mul_d", TCL_INDEX_NONE)); return TCL_ERROR; } if (!Tcl_IsShared(varPtr[varIndex])) { @@ -255,7 +255,7 @@ TestbignumobjCmd( if (mp_div_d(&bignumValue, 10, &bignumValue, NULL) != MP_OKAY) { mp_clear(&bignumValue); Tcl_SetObjResult(interp, - Tcl_NewStringObj("error in mp_div_d", -1)); + Tcl_NewStringObj("error in mp_div_d", TCL_INDEX_NONE)); return TCL_ERROR; } if (!Tcl_IsShared(varPtr[varIndex])) { @@ -280,7 +280,7 @@ TestbignumobjCmd( if (mp_mod_2d(&bignumValue, 1, &bignumValue) != MP_OKAY) { mp_clear(&bignumValue); Tcl_SetObjResult(interp, - Tcl_NewStringObj("error in mp_mod_2d", -1)); + Tcl_NewStringObj("error in mp_mod_2d", TCL_INDEX_NONE)); return TCL_ERROR; } if (!Tcl_IsShared(varPtr[varIndex])) { @@ -598,7 +598,7 @@ TestindexobjCmd( } if (objc < 5) { - Tcl_AppendToObj(Tcl_GetObjResult(interp), "wrong # args", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "wrong # args", TCL_INDEX_NONE); return TCL_ERROR; } @@ -738,7 +738,7 @@ TestintobjCmd( return TCL_ERROR; } Tcl_AppendToObj(Tcl_GetObjResult(interp), - ((wideValue == WIDE_MAX)? "1" : "0"), -1); + ((wideValue == WIDE_MAX)? "1" : "0"), TCL_INDEX_NONE); } else if (strcmp(subCmd, "get") == 0) { if (objc != 3) { goto wrongNumArgs; @@ -754,7 +754,7 @@ TestintobjCmd( if (CheckIfVarUnset(interp, varPtr,varIndex)) { return TCL_ERROR; } - Tcl_AppendToObj(Tcl_GetObjResult(interp), Tcl_GetString(varPtr[varIndex]), -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), Tcl_GetString(varPtr[varIndex]), TCL_INDEX_NONE); } else if (strcmp(subCmd, "inttoobigtest") == 0) { /* * If long ints have more bits than ints on this platform, verify that @@ -767,7 +767,7 @@ TestintobjCmd( goto wrongNumArgs; } #if (INT_MAX == LONG_MAX) /* int is same size as long int */ - Tcl_AppendToObj(Tcl_GetObjResult(interp), "1", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "1", TCL_INDEX_NONE); #else if ((varPtr[varIndex] != NULL) && !Tcl_IsShared(varPtr[varIndex])) { Tcl_SetWideIntObj(varPtr[varIndex], LONG_MAX); @@ -776,10 +776,10 @@ TestintobjCmd( } if (Tcl_GetIntFromObj(interp, varPtr[varIndex], &i) != TCL_OK) { Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), "1", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "1", TCL_INDEX_NONE); return TCL_OK; } - Tcl_AppendToObj(Tcl_GetObjResult(interp), "0", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "0", TCL_INDEX_NONE); #endif } else if (strcmp(subCmd, "mult10") == 0) { if (objc != 3) { @@ -1104,7 +1104,7 @@ TestobjCmd( const char *typeName; if (objv[2]->typePtr == NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("none", TCL_INDEX_NONE)); } else { typeName = objv[2]->typePtr->name; @@ -1113,7 +1113,7 @@ TestobjCmd( #ifndef TCL_WIDE_INT_IS_LONG else if (!strcmp(typeName, "wideInt")) typeName = "int"; #endif - Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(typeName, TCL_INDEX_NONE)); } } return TCL_OK; @@ -1207,15 +1207,15 @@ TestobjCmd( goto wrongNumArgs; } if (varPtr[varIndex]->typePtr == NULL) { /* a string! */ - Tcl_AppendToObj(Tcl_GetObjResult(interp), "string", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "string", TCL_INDEX_NONE); #ifndef TCL_WIDE_INT_IS_LONG } else if (!strcmp(varPtr[varIndex]->typePtr->name, "wideInt")) { Tcl_AppendToObj(Tcl_GetObjResult(interp), - "int", -1); + "int", TCL_INDEX_NONE); #endif } else { Tcl_AppendToObj(Tcl_GetObjResult(interp), - varPtr[varIndex]->typePtr->name, -1); + varPtr[varIndex]->typePtr->name, TCL_INDEX_NONE); } break; default: @@ -1346,7 +1346,7 @@ TeststringobjCmd( if (CheckIfVarUnset(interp, varPtr, varIndex)) { return TCL_ERROR; } - Tcl_AppendToObj(Tcl_GetObjResult(interp), Tcl_GetString(varPtr[varIndex]), -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), Tcl_GetString(varPtr[varIndex]), TCL_INDEX_NONE); break; case 4: /* length */ if (objc != 3) { @@ -1459,7 +1459,7 @@ TeststringobjCmd( } if ((length < 0) || ((Tcl_WideUInt)length > (Tcl_WideUInt)size)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "index value out of range", -1)); + "index value out of range", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -1490,7 +1490,7 @@ TeststringobjCmd( } if ((length < 0) || ((Tcl_WideUInt)length > (Tcl_WideUInt)size)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "index value out of range", -1)); + "index value out of range", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -1567,7 +1567,7 @@ GetVariableIndex( } if (index < 0 || index >= NUMBER_OF_OBJECT_VARS) { Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), "bad variable index", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "bad variable index", TCL_INDEX_NONE); return TCL_ERROR; } @@ -1604,7 +1604,7 @@ CheckIfVarUnset( sprintf(buf, "variable %" TCL_Z_MODIFIER "u is unset (NULL)", varIndex); Tcl_ResetResult(interp); - Tcl_AppendToObj(Tcl_GetObjResult(interp), buf, -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), buf, TCL_INDEX_NONE); return 1; } return 0; diff --git a/generic/tclTestProcBodyObj.c b/generic/tclTestProcBodyObj.c index 6d5e6ec..8d92c6e 100644 --- a/generic/tclTestProcBodyObj.c +++ b/generic/tclTestProcBodyObj.c @@ -146,7 +146,7 @@ RegisterCommand( if (cmdTablePtr->exportIt) { sprintf(buf, "namespace eval %s { namespace export %s }", namesp, cmdTablePtr->cmdName); - if (Tcl_EvalEx(interp, buf, -1, 0) != TCL_OK) { + if (Tcl_EvalEx(interp, buf, TCL_INDEX_NONE, 0) != TCL_OK) { return TCL_ERROR; } } diff --git a/generic/tclThreadTest.c b/generic/tclThreadTest.c index 6f37124..5781329 100644 --- a/generic/tclThreadTest.c +++ b/generic/tclThreadTest.c @@ -607,7 +607,7 @@ NewTestThread( */ Tcl_Preserve(tsdPtr->interp); - result = Tcl_EvalEx(tsdPtr->interp, threadEvalScript, -1, 0); + result = Tcl_EvalEx(tsdPtr->interp, threadEvalScript, TCL_INDEX_NONE, 0); if (result != TCL_OK) { ThreadErrorProc(tsdPtr->interp); } @@ -654,10 +654,10 @@ ThreadErrorProc( errorInfo = Tcl_GetVar2(interp, "errorInfo", NULL, TCL_GLOBAL_ONLY); if (errorProcString == NULL) { errChannel = Tcl_GetStdChannel(TCL_STDERR); - Tcl_WriteChars(errChannel, "Error from thread ", -1); - Tcl_WriteChars(errChannel, buf, -1); + Tcl_WriteChars(errChannel, "Error from thread ", TCL_INDEX_NONE); + Tcl_WriteChars(errChannel, buf, TCL_INDEX_NONE); Tcl_WriteChars(errChannel, "\n", 1); - Tcl_WriteChars(errChannel, errorInfo, -1); + Tcl_WriteChars(errChannel, errorInfo, TCL_INDEX_NONE); Tcl_WriteChars(errChannel, "\n", 1); } else { argv[0] = errorProcString; @@ -982,7 +982,7 @@ ThreadCancel( Tcl_MutexUnlock(&threadMutex); Tcl_ResetResult(interp); return Tcl_CancelEval(tsdPtr->interp, - (result != NULL) ? Tcl_NewStringObj(result, -1) : NULL, 0, flags); + (result != NULL) ? Tcl_NewStringObj(result, TCL_INDEX_NONE) : NULL, 0, flags); } /* diff --git a/generic/tclTimer.c b/generic/tclTimer.c index d49c5c8..3b4741e 100644 --- a/generic/tclTimer.c +++ b/generic/tclTimer.c @@ -21,7 +21,7 @@ typedef struct TimerHandler { Tcl_Time time; /* When timer is to fire. */ Tcl_TimerProc *proc; /* Function to call. */ - ClientData clientData; /* Argument to pass to proc. */ + void *clientData; /* Argument to pass to proc. */ Tcl_TimerToken token; /* Identifies handler so it can be deleted. */ struct TimerHandler *nextPtr; /* Next event in queue, or NULL for end of @@ -73,7 +73,7 @@ typedef struct AfterAssocData { typedef struct IdleHandler { Tcl_IdleProc *proc; /* Function to call. */ - ClientData clientData; /* Value to pass to proc. */ + void *clientData; /* Value to pass to proc. */ int generation; /* Used to distinguish older handlers from * recently-created ones. */ struct IdleHandler *nextPtr;/* Next in list of active handlers. */ @@ -150,18 +150,18 @@ static Tcl_ThreadDataKey dataKey; * Prototypes for functions referenced only in this file: */ -static void AfterCleanupProc(ClientData clientData, +static void AfterCleanupProc(void *clientData, Tcl_Interp *interp); static int AfterDelay(Tcl_Interp *interp, Tcl_WideInt ms); -static void AfterProc(ClientData clientData); +static void AfterProc(void *clientData); static void FreeAfterPtr(AfterInfo *afterPtr); static AfterInfo * GetAfterEvent(AfterAssocData *assocPtr, Tcl_Obj *commandPtr); static ThreadSpecificData *InitTimer(void); -static void TimerExitProc(ClientData clientData); +static void TimerExitProc(void *clientData); static int TimerHandlerEventProc(Tcl_Event *evPtr, int flags); -static void TimerCheckProc(ClientData clientData, int flags); -static void TimerSetupProc(ClientData clientData, int flags); +static void TimerCheckProc(void *clientData, int flags); +static void TimerSetupProc(void *clientData, int flags); /* *---------------------------------------------------------------------- @@ -251,7 +251,7 @@ Tcl_CreateTimerHandler( int milliseconds, /* How many milliseconds to wait before * invoking proc. */ Tcl_TimerProc *proc, /* Function to invoke. */ - ClientData clientData) /* Arbitrary data to pass to proc. */ + void *clientData) /* Arbitrary data to pass to proc. */ { Tcl_Time time; @@ -292,7 +292,7 @@ Tcl_TimerToken TclCreateAbsoluteTimerHandler( Tcl_Time *timePtr, Tcl_TimerProc *proc, - ClientData clientData) + void *clientData) { TimerHandler *timerHandlerPtr, *tPtr2, *prevPtr; ThreadSpecificData *tsdPtr = InitTimer(); @@ -619,7 +619,7 @@ TimerHandlerEventProc( void Tcl_DoWhenIdle( Tcl_IdleProc *proc, /* Function to invoke. */ - ClientData clientData) /* Arbitrary value to pass to proc. */ + void *clientData) /* Arbitrary value to pass to proc. */ { IdleHandler *idlePtr; Tcl_Time blockTime; @@ -663,7 +663,7 @@ Tcl_DoWhenIdle( void Tcl_CancelIdleCall( Tcl_IdleProc *proc, /* Function that was previously registered. */ - ClientData clientData) /* Arbitrary value to pass to proc. */ + void *clientData) /* Arbitrary value to pass to proc. */ { IdleHandler *idlePtr, *prevPtr; IdleHandler *nextPtr; @@ -974,7 +974,7 @@ Tcl_AfterObjCmd( Tcl_ListObjAppendElement(interp, resultListPtr, afterPtr->commandPtr); Tcl_ListObjAppendElement(interp, resultListPtr, Tcl_NewStringObj( - (afterPtr->token == NULL) ? "idle" : "timer", -1)); + (afterPtr->token == NULL) ? "idle" : "timer", TCL_INDEX_NONE)); Tcl_SetObjResult(interp, resultListPtr); } break; @@ -1149,7 +1149,7 @@ GetAfterEvent( static void AfterProc( - ClientData clientData) /* Describes command to execute. */ + void *clientData) /* Describes command to execute. */ { AfterInfo *afterPtr = (AfterInfo *)clientData; AfterAssocData *assocPtr = afterPtr->assocPtr; @@ -1251,7 +1251,7 @@ FreeAfterPtr( static void AfterCleanupProc( - ClientData clientData, /* Points to AfterAssocData for the + void *clientData, /* Points to AfterAssocData for the * interpreter. */ TCL_UNUSED(Tcl_Interp *)) { diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index f284704..1653dbe 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -42,14 +42,14 @@ #define ZIPFS_ERROR(interp,errstr) \ do { \ if (interp) { \ - Tcl_SetObjResult(interp, Tcl_NewStringObj(errstr, -1)); \ + Tcl_SetObjResult(interp, Tcl_NewStringObj(errstr, TCL_INDEX_NONE)); \ } \ } while (0) #define ZIPFS_MEM_ERROR(interp) \ do { \ if (interp) { \ Tcl_SetObjResult(interp, Tcl_NewStringObj( \ - "out of memory", -1)); \ + "out of memory", TCL_INDEX_NONE)); \ Tcl_SetErrorCode(interp, "TCL", "MALLOC", NULL); \ } \ } while (0) @@ -1708,8 +1708,8 @@ ZipFSCatalogFilesystem( Tcl_DString ds2; Tcl_DStringInit(&ds2); - Tcl_DStringAppend(&ds2, "assets/.root/", -1); - Tcl_DStringAppend(&ds2, path, -1); + Tcl_DStringAppend(&ds2, "assets/.root/", TCL_INDEX_NONE); + Tcl_DStringAppend(&ds2, path, TCL_INDEX_NONE); if (ZipFSLookup(Tcl_DStringValue(&ds2))) { /* should not happen but skip it anyway */ Tcl_DStringFree(&ds2); @@ -1785,7 +1785,7 @@ ZipFSCatalogFilesystem( Tcl_DStringSetLength(&ds, strlen(z->name) + 8); Tcl_DStringSetLength(&ds, 0); - Tcl_DStringAppend(&ds, z->name, -1); + Tcl_DStringAppend(&ds, z->name, TCL_INDEX_NONE); dir = Tcl_DStringValue(&ds); for (endPtr = strrchr(dir, '/'); endPtr && (endPtr != dir); endPtr = strrchr(dir, '/')) { @@ -1907,9 +1907,9 @@ ListMountPoints( hPtr = Tcl_NextHashEntry(&search)) { zf = (ZipFile *) Tcl_GetHashValue(hPtr); Tcl_ListObjAppendElement(NULL, resultList, Tcl_NewStringObj( - zf->mountPoint, -1)); + zf->mountPoint, TCL_INDEX_NONE)); Tcl_ListObjAppendElement(NULL, resultList, Tcl_NewStringObj( - zf->name, -1)); + zf->name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, resultList); return TCL_OK; @@ -1943,7 +1943,7 @@ DescribeMounted( ZipFile *zf = ZipFSLookupZip(mountPoint); if (zf) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(zf->name, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(zf->name, TCL_INDEX_NONE)); return TCL_OK; } } @@ -2237,7 +2237,7 @@ ZipFSMountObjCmd( zipFileObj = Tcl_FSGetNormalizedPath(interp, objv[2]); if (!zipFileObj) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "could not normalize zip filename", -1)); + "could not normalize zip filename", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "NORMALIZE", NULL); return TCL_ERROR; } @@ -2333,7 +2333,7 @@ ZipFSRootObjCmd( TCL_UNUSED(int) /*objc*/, TCL_UNUSED(Tcl_Obj *const *)) /*objv*/ { - Tcl_SetObjResult(interp, Tcl_NewStringObj(ZIPFS_VOLUME, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(ZIPFS_VOLUME, TCL_INDEX_NONE)); return TCL_OK; } @@ -2451,7 +2451,7 @@ RandomChar( double r; Tcl_Obj *ret; - if (Tcl_EvalEx(interp, "::tcl::mathfunc::rand", -1, 0) != TCL_OK) { + if (Tcl_EvalEx(interp, "::tcl::mathfunc::rand", TCL_INDEX_NONE, 0) != TCL_OK) { goto failed; } ret = Tcl_GetObjResult(interp); @@ -2540,7 +2540,7 @@ ZipAddFile( * crazy enough to embed NULs in filenames, they deserve what they get! */ - zpathExt = Tcl_UtfToExternalDString(ZipFS.utf8, zpathTcl, -1, &zpathDs); + zpathExt = Tcl_UtfToExternalDString(ZipFS.utf8, zpathTcl, TCL_INDEX_NONE, &zpathDs); zpathlen = strlen(zpathExt); if (zpathlen + ZIP_CENTRAL_HEADER_LEN > bufsize) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -2880,7 +2880,7 @@ ZipFSFind( Tcl_Obj *cmd[2]; int result; - cmd[0] = Tcl_NewStringObj("::tcl::zipfs::find", -1); + cmd[0] = Tcl_NewStringObj("::tcl::zipfs::find", TCL_INDEX_NONE); cmd[1] = dirRoot; Tcl_IncrRefCount(cmd[0]); result = Tcl_EvalObjv(interp, 2, cmd, 0); @@ -3208,7 +3208,7 @@ ZipFSMkZipOrImg( } z = (ZipEntry *) Tcl_GetHashValue(hPtr); - name = Tcl_UtfToExternalDString(ZipFS.utf8, z->name, -1, &ds); + name = Tcl_UtfToExternalDString(ZipFS.utf8, z->name, TCL_INDEX_NONE, &ds); len = Tcl_DStringLength(&ds); SerializeCentralDirectoryEntry(start, end, (unsigned char *) buf, z, len); @@ -3628,7 +3628,7 @@ ZipFSCanonicalObjCmd( filename = TclGetString(objv[2]); result = CanonicalPath(mntpoint, filename, &dPath, zipfs); } - Tcl_SetObjResult(interp, Tcl_NewStringObj(result, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(result, TCL_INDEX_NONE)); return TCL_OK; } @@ -3673,7 +3673,7 @@ ZipFSExistsObjCmd( filename = TclGetString(objv[1]); Tcl_DStringInit(&ds); Tcl_DStringAppend(&ds, ZIPFS_VOLUME, ZIPFS_VOLUME_LEN - 1); - Tcl_DStringAppend(&ds, filename, -1); + Tcl_DStringAppend(&ds, filename, TCL_INDEX_NONE); filename = Tcl_DStringValue(&ds); ReadLock(); @@ -3724,7 +3724,7 @@ ZipFSInfoObjCmd( Tcl_Obj *result = Tcl_GetObjResult(interp); Tcl_ListObjAppendElement(interp, result, - Tcl_NewStringObj(z->zipFilePtr->name, -1)); + Tcl_NewStringObj(z->zipFilePtr->name, TCL_INDEX_NONE)); Tcl_ListObjAppendElement(interp, result, Tcl_NewWideIntObj(z->numBytes)); Tcl_ListObjAppendElement(interp, result, @@ -3810,7 +3810,7 @@ ZipFSListObjCmd( if (Tcl_StringMatch(z->name, pattern)) { Tcl_ListObjAppendElement(interp, result, - Tcl_NewStringObj(z->name, -1)); + Tcl_NewStringObj(z->name, TCL_INDEX_NONE)); } } } else if (regexp) { @@ -3820,7 +3820,7 @@ ZipFSListObjCmd( if (Tcl_RegExpExec(interp, regexp, z->name, z->name)) { Tcl_ListObjAppendElement(interp, result, - Tcl_NewStringObj(z->name, -1)); + Tcl_NewStringObj(z->name, TCL_INDEX_NONE)); } } } else { @@ -3829,7 +3829,7 @@ ZipFSListObjCmd( ZipEntry *z = (ZipEntry *) Tcl_GetHashValue(hPtr); Tcl_ListObjAppendElement(interp, result, - Tcl_NewStringObj(z->name, -1)); + Tcl_NewStringObj(z->name, TCL_INDEX_NONE)); } } Unlock(); @@ -3873,7 +3873,7 @@ TclZipfs_TclLibrary(void) */ if (zipfs_literal_tcl_library) { - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } /* @@ -3887,7 +3887,7 @@ TclZipfs_TclLibrary(void) Tcl_DecrRefCount(vfsInitScript); if (found == TCL_OK) { zipfs_literal_tcl_library = ZIPFS_APP_MOUNT "/tcl_library"; - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } /* @@ -3906,17 +3906,17 @@ TclZipfs_TclLibrary(void) #endif if (ZipfsAppHookFindTclInit(dllName) == TCL_OK) { - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } #elif !defined(NO_DLFCN_H) Dl_info dlinfo; if (dladdr((const void *)TclZipfs_TclLibrary, &dlinfo) && (dlinfo.dli_fname != NULL) && (ZipfsAppHookFindTclInit(dlinfo.dli_fname) == TCL_OK)) { - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } #else if (ZipfsAppHookFindTclInit(CFG_RUNTIME_LIBDIR "/" CFG_RUNTIME_DLLFILE) == TCL_OK) { - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } #endif /* _WIN32 */ #endif /* !defined(STATIC_BUILD) */ @@ -3927,7 +3927,7 @@ TclZipfs_TclLibrary(void) */ if (zipfs_literal_tcl_library) { - return Tcl_NewStringObj(zipfs_literal_tcl_library, -1); + return Tcl_NewStringObj(zipfs_literal_tcl_library, TCL_INDEX_NONE); } return NULL; } @@ -4936,7 +4936,7 @@ static Tcl_Obj * ZipFSFilesystemSeparatorProc( TCL_UNUSED(Tcl_Obj *) /*pathPtr*/) { - return Tcl_NewStringObj("/", -1); + return Tcl_NewStringObj("/", TCL_INDEX_NONE); } /* @@ -4956,11 +4956,11 @@ AppendWithPrefix( Tcl_DString *prefix, /* The prefix to add to the element, or NULL * for don't do that. */ const char *name, /* The name to append. */ - int nameLen) /* The length of the name. May be -1 for + size_t nameLen) /* The length of the name. May be TCL_INDEX_NONE for * append-up-to-NUL-byte. */ { if (prefix) { - int prefixLength = Tcl_DStringLength(prefix); + size_t prefixLength = Tcl_DStringLength(prefix); Tcl_DStringAppend(prefix, name, nameLen); Tcl_ListObjAppendElement(NULL, result, Tcl_NewStringObj( @@ -5063,7 +5063,7 @@ ZipFSMatchInDirectoryProc( if (z && ((dirOnly < 0) || (!dirOnly && !z->isDirectory) || (dirOnly && z->isDirectory))) { - AppendWithPrefix(result, prefixBuf, z->name, -1); + AppendWithPrefix(result, prefixBuf, z->name, TCL_INDEX_NONE); } goto end; } @@ -5096,7 +5096,7 @@ ZipFSMatchInDirectoryProc( continue; } if ((z->depth == scnt) && Tcl_StringCaseMatch(z->name, pat, 0)) { - AppendWithPrefix(result, prefixBuf, z->name + strip, -1); + AppendWithPrefix(result, prefixBuf, z->name + strip, TCL_INDEX_NONE); } } Tcl_Free(pat); @@ -5286,7 +5286,7 @@ ZipFSPathInFilesystemProc( static Tcl_Obj * ZipFSListVolumesProc(void) { - return Tcl_NewStringObj(ZIPFS_VOLUME, -1); + return Tcl_NewStringObj(ZIPFS_VOLUME, TCL_INDEX_NONE); } /* @@ -5400,10 +5400,10 @@ ZipFSFileAttrsGetProc( z->zipFilePtr->mountPointLen); break; case ZIP_ATTR_ARCHIVE: - *objPtrRef = Tcl_NewStringObj(z->zipFilePtr->name, -1); + *objPtrRef = Tcl_NewStringObj(z->zipFilePtr->name, TCL_INDEX_NONE); break; case ZIP_ATTR_PERMISSIONS: - *objPtrRef = Tcl_NewStringObj("0o555", -1); + *objPtrRef = Tcl_NewStringObj("0o555", TCL_INDEX_NONE); break; case ZIP_ATTR_CRC: TclNewIntObj(*objPtrRef, z->crc32); @@ -5464,7 +5464,7 @@ static Tcl_Obj * ZipFSFilesystemPathTypeProc( TCL_UNUSED(Tcl_Obj *) /*pathPtr*/) { - return Tcl_NewStringObj("zip", -1); + return Tcl_NewStringObj("zip", TCL_INDEX_NONE); } /* @@ -5661,7 +5661,7 @@ TclZipfs_Init( Tcl_Command ensemble; Tcl_Obj *mapObj; - Tcl_EvalEx(interp, findproc, -1, TCL_EVAL_GLOBAL); + Tcl_EvalEx(interp, findproc, TCL_INDEX_NONE, TCL_EVAL_GLOBAL); if (!Tcl_IsSafe(interp)) { Tcl_LinkVar(interp, "::tcl::zipfs::wrmax", (char *) &ZipFS.wrmax, TCL_LINK_INT); @@ -5676,8 +5676,8 @@ TclZipfs_Init( */ Tcl_GetEnsembleMappingDict(NULL, ensemble, &mapObj); - Tcl_DictObjPut(NULL, mapObj, Tcl_NewStringObj("find", -1), - Tcl_NewStringObj("::tcl::zipfs::find", -1)); + Tcl_DictObjPut(NULL, mapObj, Tcl_NewStringObj("find", TCL_INDEX_NONE), + Tcl_NewStringObj("::tcl::zipfs::find", TCL_INDEX_NONE)); Tcl_CreateObjCommand(interp, "::tcl::zipfs::tcl_library_init", ZipFSTclLibraryObjCmd, NULL, NULL); Tcl_PkgProvide(interp, "tcl::zipfs", "2.0"); @@ -5859,7 +5859,7 @@ TclZipfs_AppHook( Tcl_DString ds; Tcl_DStringInit(&ds); - archive = Tcl_WCharToUtfDString((*argvPtr)[1], -1, &ds); + archive = Tcl_WCharToUtfDString((*argvPtr)[1], TCL_INDEX_NONE, &ds); #else /* !_WIN32 */ archive = (*argvPtr)[1]; #endif /* _WIN32 */ diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 718feb7..79aa9cb 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -313,7 +313,7 @@ ConvertError( sprintf(codeStrBuf, "%d", code); break; } - Tcl_SetObjResult(interp, Tcl_NewStringObj(zError(code), -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(zError(code), TCL_INDEX_NONE)); /* * Tricky point! We might pass NULL twice here (and will when the error @@ -350,7 +350,7 @@ ConvertErrorToList( return Tcl_NewListObj(3, objv); case Z_ERRNO: TclNewLiteralStringObj(objv[2], "POSIX"); - objv[3] = Tcl_NewStringObj(Tcl_ErrnoId(), -1); + objv[3] = Tcl_NewStringObj(Tcl_ErrnoId(), TCL_INDEX_NONE); return Tcl_NewListObj(4, objv); case Z_NEED_DICT: TclNewLiteralStringObj(objv[2], "NEED_DICT"); @@ -405,7 +405,7 @@ GetValue( const char *nameStr, Tcl_Obj **valuePtrPtr) { - Tcl_Obj *name = Tcl_NewStringObj(nameStr, -1); + Tcl_Obj *name = Tcl_NewStringObj(nameStr, TCL_INDEX_NONE); int result = Tcl_DictObjGet(interp, dictObj, name, valuePtrPtr); TclDecrRefCount(name); @@ -557,7 +557,7 @@ GenerateHeader( */ #define SetValue(dictObj, key, value) \ - Tcl_DictObjPut(NULL, (dictObj), Tcl_NewStringObj((key), -1), (value)) + Tcl_DictObjPut(NULL, (dictObj), Tcl_NewStringObj((key), TCL_INDEX_NONE), (value)) static void ExtractHeader( @@ -579,7 +579,7 @@ ExtractHeader( } } - (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, -1, + (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->comment, TCL_INDEX_NONE, &tmp); SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp)); } @@ -596,7 +596,7 @@ ExtractHeader( } } - (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, -1, + (void)Tcl_ExternalToUtfDString(latin1enc, (char *) headerPtr->name, TCL_INDEX_NONE, &tmp); SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp)); } @@ -608,7 +608,7 @@ ExtractHeader( } if (headerPtr->text != Z_UNKNOWN) { SetValue(dictObj, "type", - Tcl_NewStringObj(headerPtr->text ? "text" : "binary", -1)); + Tcl_NewStringObj(headerPtr->text ? "text" : "binary", TCL_INDEX_NONE)); } if (latin1enc != NULL) { @@ -842,7 +842,7 @@ Tcl_ZlibStreamInit( */ if (interp != NULL) { - if (Tcl_EvalEx(interp, "::incr ::tcl::zlib::cmdcounter", -1, 0) != TCL_OK) { + if (Tcl_EvalEx(interp, "::incr ::tcl::zlib::cmdcounter", TCL_INDEX_NONE, 0) != TCL_OK) { goto error; } Tcl_DStringInit(&cmdname); @@ -851,7 +851,7 @@ Tcl_ZlibStreamInit( if (Tcl_FindCommand(interp, Tcl_DStringValue(&cmdname), NULL, 0) != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "BUG: Stream command name already exists", -1)); + "BUG: Stream command name already exists", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "BUG", "EXISTING_CMD", NULL); Tcl_DStringFree(&cmdname); goto error; @@ -1242,7 +1242,7 @@ Tcl_ZlibStreamPut( if (zshPtr->streamEnd) { if (zshPtr->interp) { Tcl_SetObjResult(zshPtr->interp, Tcl_NewStringObj( - "already past compressed stream end", -1)); + "already past compressed stream end", TCL_INDEX_NONE)); Tcl_SetErrorCode(zshPtr->interp, "TCL", "ZIP", "CLOSED", NULL); } return TCL_ERROR; @@ -1473,7 +1473,7 @@ Tcl_ZlibStreamGet( if (zshPtr->interp) { Tcl_SetObjResult(zshPtr->interp, Tcl_NewStringObj( "unexpected zlib internal state during" - " decompression", -1)); + " decompression", TCL_INDEX_NONE)); Tcl_SetErrorCode(zshPtr->interp, "TCL", "ZIP", "STATE", NULL); } @@ -2238,7 +2238,7 @@ ZlibCmd( return TCL_ERROR; badLevel: - Tcl_SetObjResult(interp, Tcl_NewStringObj("level must be 0 to 9", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("level must be 0 to 9", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "COMPRESSIONLEVEL", NULL); if (extraInfoStr) { Tcl_AddErrorInfo(interp, extraInfoStr); @@ -2501,13 +2501,13 @@ ZlibPushSubcmd( if (mode == TCL_ZLIB_STREAM_DEFLATE && !(chanMode & TCL_WRITABLE)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "compression may only be applied to writable channels", -1)); + "compression may only be applied to writable channels", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "UNWRITABLE", NULL); return TCL_ERROR; } if (mode == TCL_ZLIB_STREAM_INFLATE && !(chanMode & TCL_READABLE)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "decompression may only be applied to readable channels",-1)); + "decompression may only be applied to readable channels",TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "UNREADABLE", NULL); return TCL_ERROR; } @@ -2541,7 +2541,7 @@ ZlibPushSubcmd( } if (level < 0 || level > 9) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "level must be 0 to 9", -1)); + "level must be 0 to 9", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "COMPRESSIONLEVEL", NULL); goto genericOptionError; @@ -2563,7 +2563,7 @@ ZlibPushSubcmd( if (format == TCL_ZLIB_FORMAT_GZIP) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "a compression dictionary may not be set in the " - "gzip format", -1)); + "gzip format", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "BADOPT", NULL); goto genericOptionError; } @@ -2775,7 +2775,7 @@ ZlibStreamAddCmd( if (i == objc-2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-buffer\" option must be followed by integer " - "decompression buffersize", -1)); + "decompression buffersize", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "NOVAL", NULL); return TCL_ERROR; } @@ -2794,7 +2794,7 @@ ZlibStreamAddCmd( if (i == objc-2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-dictionary\" option must be followed by" - " compression dictionary bytes", -1)); + " compression dictionary bytes", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "NOVAL", NULL); return TCL_ERROR; } @@ -2805,7 +2805,7 @@ ZlibStreamAddCmd( if (flush == -2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-flush\", \"-fullflush\" and \"-finalize\" options" - " are mutually exclusive", -1)); + " are mutually exclusive", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "EXCLUSIVE", NULL); return TCL_ERROR; } @@ -2902,7 +2902,7 @@ ZlibStreamPutCmd( if (i == objc-2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-dictionary\" option must be followed by" - " compression dictionary bytes", -1)); + " compression dictionary bytes", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "NOVAL", NULL); return TCL_ERROR; } @@ -2912,7 +2912,7 @@ ZlibStreamPutCmd( if (flush == -2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-flush\", \"-fullflush\" and \"-finalize\" options" - " are mutually exclusive", -1)); + " are mutually exclusive", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "EXCLUSIVE", NULL); return TCL_ERROR; } @@ -2960,7 +2960,7 @@ ZlibStreamHeaderCmd( } else if (zshPtr->mode != TCL_ZLIB_STREAM_INFLATE || zshPtr->format != TCL_ZLIB_FORMAT_GZIP) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "only gunzip streams can produce header information", -1)); + "only gunzip streams can produce header information", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ZIP", "BADOP", NULL); return TCL_ERROR; } @@ -3274,7 +3274,7 @@ ZlibTransformOutput( Tcl_ListObjAppendElement(NULL, errObj, ConvertErrorToList(e, cd->outStream.adler)); Tcl_ListObjAppendElement(NULL, errObj, - Tcl_NewStringObj(cd->outStream.msg, -1)); + Tcl_NewStringObj(cd->outStream.msg, TCL_INDEX_NONE)); Tcl_SetChannelError(cd->parent, errObj); *errorCodePtr = EINVAL; return -1; @@ -3424,7 +3424,7 @@ ZlibTransformSetOption( /* not used */ return TCL_ERROR; } else if (newLimit < 1 || newLimit > MAX_BUFFER_SIZE) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "-limit must be between 1 and 65536", -1)); + "-limit must be between 1 and 65536", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "READLIMIT", NULL); return TCL_ERROR; } @@ -3498,7 +3498,7 @@ ZlibTransformGetOption( Tcl_DStringAppendElement(dsPtr, "-checksum"); Tcl_DStringAppendElement(dsPtr, buf); } else { - Tcl_DStringAppend(dsPtr, buf, -1); + Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); return TCL_OK; } } @@ -3824,7 +3824,7 @@ ZlibStackChannelTransform( } cd->chan = chan; cd->parent = Tcl_GetStackedChannel(chan); - Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetChannelName(chan), -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(Tcl_GetChannelName(chan), TCL_INDEX_NONE)); return chan; error: @@ -3954,7 +3954,7 @@ ResultDecompress( Tcl_ListObjAppendElement(NULL, errObj, ConvertErrorToList(e, cd->inStream.adler)); Tcl_ListObjAppendElement(NULL, errObj, - Tcl_NewStringObj(cd->inStream.msg, -1)); + Tcl_NewStringObj(cd->inStream.msg, TCL_INDEX_NONE)); Tcl_SetChannelError(cd->parent, errObj); *errorCodePtr = EINVAL; return -1; @@ -3978,7 +3978,7 @@ TclZlibInit( * commands. */ - Tcl_EvalEx(interp, "namespace eval ::tcl::zlib {variable cmdcounter 0}", -1, 0); + Tcl_EvalEx(interp, "namespace eval ::tcl::zlib {variable cmdcounter 0}", TCL_INDEX_NONE, 0); /* * Create the public scripted interface to this file's functionality. @@ -4029,7 +4029,7 @@ Tcl_ZlibStreamInit( Tcl_ZlibStream *zshandle) { if (interp) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNIMPLEMENTED", NULL); } return TCL_ERROR; @@ -4097,7 +4097,7 @@ Tcl_ZlibDeflate( Tcl_Obj *gzipHeaderDictObj) { if (interp) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNIMPLEMENTED", NULL); } return TCL_ERROR; @@ -4112,7 +4112,7 @@ Tcl_ZlibInflate( Tcl_Obj *gzipHeaderDictObj) { if (interp) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("unimplemented", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNIMPLEMENTED", NULL); } return TCL_ERROR; diff --git a/macosx/tclMacOSXFCmd.c b/macosx/tclMacOSXFCmd.c index 7bdc72a..7fc085c 100644 --- a/macosx/tclMacOSXFCmd.c +++ b/macosx/tclMacOSXFCmd.c @@ -203,7 +203,7 @@ TclMacOSXGetFileAttribute( return TCL_OK; #else Tcl_SetObjResult(interp, Tcl_NewStringObj( - "Mac OS X file attributes not supported", -1)); + "Mac OS X file attributes not supported", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNSUPPORTED", NULL); return TCL_ERROR; #endif /* HAVE_GETATTRLIST */ @@ -335,7 +335,7 @@ TclMacOSXSetFileAttribute( if (newRsrcForkSize != 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "setting nonzero rsrclength not supported", -1)); + "setting nonzero rsrclength not supported", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNSUPPORTED", NULL); return TCL_ERROR; } @@ -376,7 +376,7 @@ TclMacOSXSetFileAttribute( return TCL_OK; #else Tcl_SetObjResult(interp, Tcl_NewStringObj( - "Mac OS X file attributes not supported", -1)); + "Mac OS X file attributes not supported", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "UNSUPPORTED", NULL); return TCL_ERROR; #endif -- cgit v0.12 From 76edd58c3e121255d2dae1c5bc1b2fc86d1ab3fc Mon Sep 17 00:00:00 2001 From: pooryorick Date: Sun, 5 Mar 2023 07:11:15 +0000 Subject: A better fix for Valgrind "still reachable" report in TestcmdtokenCmd(). --- generic/tclTest.c | 56 ++++++++++++++++++++++++++++++++++++++---------------- tests/basic.test | 6 +++--- tests/cmdInfo.test | 6 +++--- 3 files changed, 46 insertions(+), 22 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index fbd4774..5b57157 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -70,6 +70,7 @@ static Tcl_Interp *delInterp; typedef struct TestCommandTokenRef { int id; /* Identifier for this reference. */ Tcl_Command token; /* Tcl's token for the command. */ + const char *value; struct TestCommandTokenRef *nextPtr; /* Next in list of references. */ } TestCommandTokenRef; @@ -1179,6 +1180,18 @@ TestcmdinfoCmd( } static int +CmdProc0( + void *clientData, /* String to return. */ + Tcl_Interp *interp, /* Current interpreter. */ + TCL_UNUSED(int) /*argc*/, + TCL_UNUSED(const char **) /*argv*/) +{ + TestCommandTokenRef *refPtr = (TestCommandTokenRef *) clientData; + Tcl_AppendResult(interp, "CmdProc1 ", refPtr->value, NULL); + return TCL_OK; +} + +static int CmdProc1( void *clientData, /* String to return. */ Tcl_Interp *interp, /* Current interpreter. */ @@ -1189,6 +1202,7 @@ CmdProc1( return TCL_OK; } + static int CmdProc2( void *clientData, /* String to return. */ @@ -1201,6 +1215,28 @@ CmdProc2( } static void +CmdDelProc0( + void *clientData) /* String to save. */ +{ + TestCommandTokenRef *thisRefPtr, *prevRefPtr = NULL; + TestCommandTokenRef *refPtr = (TestCommandTokenRef *) clientData; + int id = refPtr->id; + for (thisRefPtr = firstCommandTokenRef; refPtr != NULL; + thisRefPtr = thisRefPtr->nextPtr) { + if (thisRefPtr->id == id) { + if (prevRefPtr != NULL) { + prevRefPtr->nextPtr = thisRefPtr->nextPtr; + } else { + firstCommandTokenRef = thisRefPtr->nextPtr; + } + break; + } + prevRefPtr = thisRefPtr; + } + ckfree(refPtr); +} + +static void CmdDelProc1( void *clientData) /* String to save. */ { @@ -1242,7 +1278,7 @@ TestcmdtokenCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - TestCommandTokenRef *refPtr, *prevRefPtr; + TestCommandTokenRef *refPtr; char buf[30]; int id; @@ -1253,9 +1289,10 @@ TestcmdtokenCmd( } if (strcmp(argv[1], "create") == 0) { refPtr = (TestCommandTokenRef *)Tcl_Alloc(sizeof(TestCommandTokenRef)); - refPtr->token = Tcl_CreateCommand(interp, argv[2], CmdProc1, - (void *) "original", NULL); + refPtr->token = Tcl_CreateCommand(interp, argv[2], CmdProc0, + refPtr, CmdDelProc0); refPtr->id = nextCommandTokenRefId; + refPtr->value = "original"; nextCommandTokenRefId++; refPtr->nextPtr = firstCommandTokenRef; firstCommandTokenRef = refPtr; @@ -1291,19 +1328,6 @@ TestcmdtokenCmd( Tcl_GetCommandName(interp, refPtr->token)); Tcl_AppendElement(interp, Tcl_GetString(objPtr)); Tcl_DecrRefCount(objPtr); - } else if (strcmp(argv[1], "free") == 0) { - prevRefPtr = NULL; - for (refPtr = firstCommandTokenRef; refPtr != NULL; - refPtr = refPtr->nextPtr) { - if (refPtr->id == id) { - if (prevRefPtr != NULL) { - prevRefPtr->nextPtr = refPtr->nextPtr; - } - ckfree(refPtr); - break; - } - prevRefPtr = refPtr; - } } else { Tcl_AppendResult(interp, "bad option \"", argv[1], "\": must be create, name, or free", NULL); diff --git a/tests/basic.test b/tests/basic.test index de986c7..c90d80e 100644 --- a/tests/basic.test +++ b/tests/basic.test @@ -336,19 +336,19 @@ test basic-20.1 {Tcl_GetCommandInfo, names for commands created inside namespace }] list [testcmdtoken name $x] \ [rename ::p q] \ - [testcmdtoken name $x][testcmdtoken free $x] + [testcmdtoken name $x] } {{p ::p} {} {q ::q}} test basic-20.2 {Tcl_GetCommandInfo, names for commands created outside namespaces} {testcmdtoken} { catch {rename q ""} set x [testcmdtoken create test_ns_basic::test_ns_basic2::p] list [testcmdtoken name $x] \ [rename test_ns_basic::test_ns_basic2::p q] \ - [testcmdtoken name $x][testcmdtoken free $x] + [testcmdtoken name $x] } {{p ::test_ns_basic::test_ns_basic2::p} {} {q ::q}} test basic-20.3 {Tcl_GetCommandInfo, #-quoting} testcmdtoken { catch {rename \# ""} set x [testcmdtoken create \#] - return [testcmdtoken name $x][testcmdtoken free $x] + return [testcmdtoken name $x] } {{#} ::#} test basic-21.1 {Tcl_GetCommandName} {emptyTest} { diff --git a/tests/cmdInfo.test b/tests/cmdInfo.test index ad564d7..37b8a0b 100644 --- a/tests/cmdInfo.test +++ b/tests/cmdInfo.test @@ -70,7 +70,7 @@ test cmdinfo-4.1 {Tcl_GetCommandName/Tcl_GetCommandFullName procedures} \ rename x1 newName set y [testcmdtoken name $x] rename newName x1 - lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] + lappend y {*}[testcmdtoken name $x] } {newName ::newName x1 ::x1} catch {rename newTestCmd {}} @@ -87,7 +87,7 @@ test cmdinfo-5.1 {Names for commands created when inside namespaces} \ }] set y [testcmdtoken name $x] rename ::testCmd newTestCmd - lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] + lappend y {*}[testcmdtoken name $x] } {testCmd ::testCmd newTestCmd ::newTestCmd} test cmdinfo-6.1 {Names for commands created when outside namespaces} \ @@ -95,7 +95,7 @@ test cmdinfo-6.1 {Names for commands created when outside namespaces} \ set x [testcmdtoken create cmdInfoNs1::cmdInfoNs2::testCmd] set y [testcmdtoken name $x] rename cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 - lappend y {*}[testcmdtoken name $x][testcmdtoken free $x] + lappend y {*}[testcmdtoken name $x] } {testCmd ::cmdInfoNs1::cmdInfoNs2::testCmd newTestCmd2 ::newTestCmd2} # cleanup -- cgit v0.12 From e4e106233842d77095bf459f14bb82e953bc8c6f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 5 Mar 2023 19:57:48 +0000 Subject: Another round of -1 -> TCL_INDEX_NONE --- generic/tclAssembly.c | 30 ++++++------ generic/tclBasic.c | 72 ++++++++++++++-------------- generic/tclBinary.c | 12 ++--- generic/tclCmdIL.c | 72 ++++++++++++++-------------- generic/tclCmdMZ.c | 30 ++++++------ generic/tclCompExpr.c | 8 ++-- generic/tclDisassemble.c | 108 +++++++++++++++++++++--------------------- generic/tclEnsemble.c | 72 ++++++++++++++-------------- generic/tclEvent.c | 30 ++++++------ generic/tclExecute.c | 38 +++++++-------- generic/tclFileName.c | 28 +++++------ generic/tclIOGT.c | 52 ++++++++++----------- generic/tclIOSock.c | 2 +- generic/tclIOUtil.c | 10 ++-- generic/tclIndexObj.c | 12 ++--- generic/tclInterp.c | 116 +++++++++++++++++++++++----------------------- generic/tclNamesp.c | 58 +++++++++++------------ generic/tclOO.c | 106 +++++++++++++++++++++--------------------- generic/tclOOBasic.c | 44 +++++++++--------- generic/tclOODefineCmds.c | 112 ++++++++++++++++++++++---------------------- generic/tclOOInfo.c | 36 +++++++------- generic/tclObj.c | 22 ++++----- generic/tclParse.c | 22 ++++----- generic/tclPipe.c | 14 +++--- generic/tclProcess.c | 32 ++++++------- generic/tclScan.c | 22 ++++----- generic/tclVar.c | 36 +++++++------- unix/dltest/pkgb.c | 2 +- unix/dltest/pkgc.c | 2 +- unix/dltest/pkgd.c | 2 +- unix/dltest/pkge.c | 2 +- unix/tclUnixFCmd.c | 2 +- 32 files changed, 603 insertions(+), 603 deletions(-) diff --git a/generic/tclAssembly.c b/generic/tclAssembly.c index a05a4d4..af95312 100644 --- a/generic/tclAssembly.c +++ b/generic/tclAssembly.c @@ -1384,7 +1384,7 @@ AssembleOneLine( } if (opnd < 0 || opnd > 3) { Tcl_SetObjResult(interp, - Tcl_NewStringObj("operand must be [0..3]", -1)); + Tcl_NewStringObj("operand must be [0..3]", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "OPERAND<0,>3", NULL); goto cleanup; } @@ -1625,7 +1625,7 @@ AssembleOneLine( if (opnd < 2) { if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, - Tcl_NewStringObj("operand must be >=2", -1)); + Tcl_NewStringObj("operand must be >=2", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "OPERAND>=2", NULL); } goto cleanup; @@ -2107,7 +2107,7 @@ GetNextOperand( Tcl_DecrRefCount(operandObj); if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "assembly code may not contain substitutions", -1)); + "assembly code may not contain substitutions", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "NOSUBST", NULL); } return TCL_ERROR; @@ -2330,7 +2330,7 @@ FindLocalVar( if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "cannot use this instruction to create a variable" - " in a non-proc context", -1)); + " in a non-proc context", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "LVT", NULL); } return TCL_INDEX_NONE; @@ -2400,7 +2400,7 @@ CheckOneByte( Tcl_Obj* result; /* Error message */ if (value < 0 || value > 0xFF) { - result = Tcl_NewStringObj("operand does not fit in one byte", -1); + result = Tcl_NewStringObj("operand does not fit in one byte", TCL_INDEX_NONE); Tcl_SetObjResult(interp, result); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "1BYTE", NULL); return TCL_ERROR; @@ -2435,7 +2435,7 @@ CheckSignedOneByte( Tcl_Obj* result; /* Error message */ if (value > 0x7F || value < -0x80) { - result = Tcl_NewStringObj("operand does not fit in one byte", -1); + result = Tcl_NewStringObj("operand does not fit in one byte", TCL_INDEX_NONE); Tcl_SetObjResult(interp, result); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "1BYTE", NULL); return TCL_ERROR; @@ -2468,7 +2468,7 @@ CheckNonNegative( Tcl_Obj* result; /* Error message */ if (value < 0) { - result = Tcl_NewStringObj("operand must be nonnegative", -1); + result = Tcl_NewStringObj("operand must be nonnegative", TCL_INDEX_NONE); Tcl_SetObjResult(interp, result); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "NONNEGATIVE", NULL); return TCL_ERROR; @@ -2501,7 +2501,7 @@ CheckStrictlyPositive( Tcl_Obj* result; /* Error message */ if (value <= 0) { - result = Tcl_NewStringObj("operand must be positive", -1); + result = Tcl_NewStringObj("operand must be positive", TCL_INDEX_NONE); Tcl_SetObjResult(interp, result); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "POSITIVE", NULL); return TCL_ERROR; @@ -3414,7 +3414,7 @@ StackCheckBasicBlock( } if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "inconsistent stack depths on two execution paths", -1)); + "inconsistent stack depths on two execution paths", TCL_INDEX_NONE)); /* * TODO - add execution trace of both paths @@ -3443,7 +3443,7 @@ StackCheckBasicBlock( if (initialStackDepth + blockPtr->minStackDepth < 0) { if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("stack underflow", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("stack underflow", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADSTACK", NULL); AddBasicBlockRangeToErrorInfo(assemEnvPtr, blockPtr); Tcl_SetErrorLine(interp, blockPtr->startLine); @@ -3462,8 +3462,8 @@ StackCheckBasicBlock( + blockPtr->enclosingCatch->finalStackDepth)) { if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "code pops stack below level of enclosing catch", -1)); - Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADSTACKINCATCH", -1); + "code pops stack below level of enclosing catch", TCL_INDEX_NONE)); + Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADSTACKINCATCH", TCL_INDEX_NONE); AddBasicBlockRangeToErrorInfo(assemEnvPtr, blockPtr); Tcl_SetErrorLine(interp, blockPtr->startLine); } @@ -3734,7 +3734,7 @@ ProcessCatchesInBasicBlock( if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "execution reaches an instruction in inconsistent " - "exception contexts", -1)); + "exception contexts", TCL_INDEX_NONE)); Tcl_SetErrorLine(interp, bbPtr->startLine); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADCATCH", NULL); } @@ -3793,7 +3793,7 @@ ProcessCatchesInBasicBlock( if (enclosing == NULL) { if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "endCatch without a corresponding beginCatch", -1)); + "endCatch without a corresponding beginCatch", TCL_INDEX_NONE)); Tcl_SetErrorLine(interp, bbPtr->startLine); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "BADENDCATCH", NULL); } @@ -3868,7 +3868,7 @@ CheckForUnclosedCatches( if (assemEnvPtr->curr_bb->catchState >= BBCS_INCATCH) { if (assemEnvPtr->flags & TCL_EVAL_DIRECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "catch still active on exit from assembly code", -1)); + "catch still active on exit from assembly code", TCL_INDEX_NONE)); Tcl_SetErrorLine(interp, assemEnvPtr->curr_bb->enclosingCatch->startLine); Tcl_SetErrorCode(interp, "TCL", "ASSEM", "UNCLOSEDCATCH", NULL); diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 1dbd90b..381d127 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -2152,7 +2152,7 @@ Tcl_HideCommand( if (strstr(hiddenCmdToken, "::") != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "cannot use namespace qualifiers in hidden command" - " token (rename)", -1)); + " token (rename)", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "HIDDENTOKEN", NULL); return TCL_ERROR; } @@ -3188,11 +3188,11 @@ TclRenameCommand( */ Tcl_DStringInit(&newFullName); - Tcl_DStringAppend(&newFullName, newNsPtr->fullName, -1); + Tcl_DStringAppend(&newFullName, newNsPtr->fullName, TCL_INDEX_NONE); if (newNsPtr != iPtr->globalNsPtr) { TclDStringAppendLiteral(&newFullName, "::"); } - Tcl_DStringAppend(&newFullName, newTail, -1); + Tcl_DStringAppend(&newFullName, newTail, TCL_INDEX_NONE); cmdPtr->refCount++; CallCommandTraces(iPtr, cmdPtr, TclGetString(oldFullName), Tcl_DStringValue(&newFullName), TCL_TRACE_RENAME); @@ -3553,14 +3553,14 @@ Tcl_GetCommandFullName( if ((cmdPtr != NULL) && TclRoutineHasName(cmdPtr)) { if (cmdPtr->nsPtr != NULL) { - Tcl_AppendToObj(objPtr, cmdPtr->nsPtr->fullName, -1); + Tcl_AppendToObj(objPtr, cmdPtr->nsPtr->fullName, TCL_INDEX_NONE); if (cmdPtr->nsPtr != iPtr->globalNsPtr) { Tcl_AppendToObj(objPtr, "::", 2); } } if (cmdPtr->hPtr != NULL) { name = (char *)Tcl_GetHashKey(cmdPtr->hPtr->tablePtr, cmdPtr->hPtr); - Tcl_AppendToObj(objPtr, name, -1); + Tcl_AppendToObj(objPtr, name, TCL_INDEX_NONE); } } } @@ -4061,7 +4061,7 @@ TclInterpReady( if (iPtr->flags & DELETED) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to call eval in deleted interpreter", -1)); + "attempt to call eval in deleted interpreter", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "IDELETE", "attempt to call eval in deleted interpreter", NULL); return TCL_ERROR; @@ -4090,7 +4090,7 @@ TclInterpReady( } Tcl_SetObjResult(interp, Tcl_NewStringObj( - "too many nested evaluations (infinite loop?)", -1)); + "too many nested evaluations (infinite loop?)", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LIMIT", "STACK", NULL); return TCL_ERROR; } @@ -4224,7 +4224,7 @@ Tcl_Canceled( } } - Tcl_SetObjResult(interp, Tcl_NewStringObj(message, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(message, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "CANCEL", id, message, NULL); } @@ -6361,10 +6361,10 @@ ProcessUnexpectedResult( Tcl_ResetResult(interp); if (returnCode == TCL_BREAK) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "invoked \"break\" outside of a loop", -1)); + "invoked \"break\" outside of a loop", TCL_INDEX_NONE)); } else if (returnCode == TCL_CONTINUE) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "invoked \"continue\" outside of a loop", -1)); + "invoked \"continue\" outside of a loop", TCL_INDEX_NONE)); } else { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "command returned bad code: %d", returnCode)); @@ -6410,7 +6410,7 @@ Tcl_ExprLong( *ptr = 0; } else { - exprPtr = Tcl_NewStringObj(exprstring, -1); + exprPtr = Tcl_NewStringObj(exprstring, TCL_INDEX_NONE); Tcl_IncrRefCount(exprPtr); result = Tcl_ExprLongObj(interp, exprPtr, ptr); Tcl_DecrRefCount(exprPtr); @@ -6435,7 +6435,7 @@ Tcl_ExprDouble( *ptr = 0.0; } else { - exprPtr = Tcl_NewStringObj(exprstring, -1); + exprPtr = Tcl_NewStringObj(exprstring, TCL_INDEX_NONE); Tcl_IncrRefCount(exprPtr); result = Tcl_ExprDoubleObj(interp, exprPtr, ptr); Tcl_DecrRefCount(exprPtr); @@ -6460,7 +6460,7 @@ Tcl_ExprBoolean( return TCL_OK; } else { int result; - Tcl_Obj *exprPtr = Tcl_NewStringObj(exprstring, -1); + Tcl_Obj *exprPtr = Tcl_NewStringObj(exprstring, TCL_INDEX_NONE); Tcl_IncrRefCount(exprPtr); result = Tcl_ExprBooleanObj(interp, exprPtr, ptr); @@ -6673,7 +6673,7 @@ TclObjInvoke( } if ((objc < 1) || (objv == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "illegal argument vector", -1)); + "illegal argument vector", TCL_INDEX_NONE)); return TCL_ERROR; } if ((flags & TCL_INVOKE_HIDDEN) == 0) { @@ -6772,7 +6772,7 @@ Tcl_ExprString( Tcl_SetObjResult(interp, Tcl_NewWideIntObj(0)); } else { - Tcl_Obj *resultPtr, *exprObj = Tcl_NewStringObj(expr, -1); + Tcl_Obj *resultPtr, *exprObj = Tcl_NewStringObj(expr, TCL_INDEX_NONE); Tcl_IncrRefCount(exprObj); code = Tcl_ExprObj(interp, exprObj, &resultPtr); @@ -6886,10 +6886,10 @@ Tcl_VarEval( if (string == NULL) { break; } - Tcl_DStringAppend(&buf, string, -1); + Tcl_DStringAppend(&buf, string, TCL_INDEX_NONE); } - result = Tcl_EvalEx(interp, Tcl_DStringValue(&buf), -1, 0); + result = Tcl_EvalEx(interp, Tcl_DStringValue(&buf), TCL_INDEX_NONE, 0); Tcl_DStringFree(&buf); return result; } @@ -7192,7 +7192,7 @@ ExprIsqrtFunc( negarg: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "square root of negative argument", -1)); + "square root of negative argument", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "DOMAIN", "domain error: argument not in valid range", NULL); return TCL_ERROR; @@ -8806,7 +8806,7 @@ TclNRTailcallObjCmd( if (!(iPtr->varFramePtr->isProcCallFrame & 1)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "tailcall can only be called from a proc, lambda or method", -1)); + "tailcall can only be called from a proc, lambda or method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "TAILCALL", "ILLEGAL", NULL); return TCL_ERROR; } @@ -8836,7 +8836,7 @@ TclNRTailcallObjCmd( * namespace, the rest the command to be tailcalled. */ - nsObjPtr = Tcl_NewStringObj(nsPtr->fullName, -1); + nsObjPtr = Tcl_NewStringObj(nsPtr->fullName, TCL_INDEX_NONE); listPtr = Tcl_NewListObj(objc, objv); TclListObjSetElement(interp, listPtr, 0, nsObjPtr); @@ -8968,7 +8968,7 @@ TclNRYieldObjCmd( if (!corPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yield can only be called in a coroutine", -1)); + "yield can only be called in a coroutine", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ILLEGAL_YIELD", NULL); return TCL_ERROR; } @@ -9001,14 +9001,14 @@ TclNRYieldToObjCmd( if (!corPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yieldto can only be called in a coroutine", -1)); + "yieldto can only be called in a coroutine", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ILLEGAL_YIELD", NULL); return TCL_ERROR; } if (((Namespace *) nsPtr)->flags & NS_DYING) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yieldto called in deleted namespace", -1)); + "yieldto called in deleted namespace", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "YIELDTO_IN_DELETED", NULL); return TCL_ERROR; @@ -9021,7 +9021,7 @@ TclNRYieldToObjCmd( */ listPtr = Tcl_NewListObj(objc, objv); - nsObjPtr = Tcl_NewStringObj(nsPtr->fullName, -1); + nsObjPtr = Tcl_NewStringObj(nsPtr->fullName, TCL_INDEX_NONE); TclListObjSetElement(interp, listPtr, 0, nsObjPtr); /* @@ -9243,7 +9243,7 @@ TclNRCoroutineActivateCallback( Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot yield: C stack busy", -1)); + "cannot yield: C stack busy", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "CANT_YIELD", NULL); return TCL_ERROR; @@ -9332,7 +9332,7 @@ CoroTypeObjCmd( cmdPtr = (Command *) Tcl_GetCommandFromObj(interp, objv[1]); if ((!cmdPtr) || (cmdPtr->nreProc != TclNRInterpCoroutine)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can only get coroutine type of a coroutine", -1)); + "can only get coroutine type of a coroutine", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "COROUTINE", TclGetString(objv[1]), NULL); return TCL_ERROR; @@ -9345,7 +9345,7 @@ CoroTypeObjCmd( corPtr = (CoroutineData *)cmdPtr->objClientData; if (!COR_IS_SUSPENDED(corPtr)) { - Tcl_SetObjResult(interp, Tcl_NewStringObj("active", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("active", TCL_INDEX_NONE)); return TCL_OK; } @@ -9356,14 +9356,14 @@ CoroTypeObjCmd( switch (corPtr->nargs) { case COROUTINE_ARGUMENTS_SINGLE_OPTIONAL: - Tcl_SetObjResult(interp, Tcl_NewStringObj("yield", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("yield", TCL_INDEX_NONE)); return TCL_OK; case COROUTINE_ARGUMENTS_ARBITRARY: - Tcl_SetObjResult(interp, Tcl_NewStringObj("yieldto", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("yieldto", TCL_INDEX_NONE)); return TCL_OK; default: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unknown coroutine type", -1)); + "unknown coroutine type", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "BAD_TYPE", NULL); return TCL_ERROR; } @@ -9392,7 +9392,7 @@ GetCoroutineFromObj( Command *cmdPtr = (Command *) Tcl_GetCommandFromObj(interp, objPtr); if ((!cmdPtr) || (cmdPtr->nreProc != TclNRInterpCoroutine)) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(errMsg, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(errMsg, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "COROUTINE", TclGetString(objPtr), NULL); return NULL; @@ -9426,7 +9426,7 @@ TclNRCoroInjectObjCmd( } if (!COR_IS_SUSPENDED(corPtr)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can only inject a command into a suspended coroutine", -1)); + "can only inject a command into a suspended coroutine", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ACTIVE", NULL); return TCL_ERROR; } @@ -9560,10 +9560,10 @@ InjectHandler( if (nargs == COROUTINE_ARGUMENTS_SINGLE_OPTIONAL) { Tcl_ListObjAppendElement(NULL, listPtr, - Tcl_NewStringObj("yield", -1)); + Tcl_NewStringObj("yield", TCL_INDEX_NONE)); } else if (nargs == COROUTINE_ARGUMENTS_ARBITRARY) { Tcl_ListObjAppendElement(NULL, listPtr, - Tcl_NewStringObj("yieldto", -1)); + Tcl_NewStringObj("yieldto", TCL_INDEX_NONE)); } else { /* * I don't think this is reachable... @@ -9662,7 +9662,7 @@ NRInjectObjCmd( } if (!COR_IS_SUSPENDED(corPtr)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can only inject a command into a suspended coroutine", -1)); + "can only inject a command into a suspended coroutine", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ACTIVE", NULL); return TCL_ERROR; } @@ -9716,7 +9716,7 @@ TclNRInterpCoroutine( if (corPtr->nargs + 1 != (size_t)objc) { Tcl_SetObjResult(interp, Tcl_NewStringObj("wrong coro nargs; how did we get here? " - "not implemented!", -1)); + "not implemented!", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "WRONGARGS", NULL); return TCL_ERROR; } diff --git a/generic/tclBinary.c b/generic/tclBinary.c index e0d99c7..1083533 100644 --- a/generic/tclBinary.c +++ b/generic/tclBinary.c @@ -395,7 +395,7 @@ TclGetBytesFromObj( if (interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "byte sequence length exceeds INT_MAX", -1)); + "byte sequence length exceeds INT_MAX", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "API", "OUTDATED", NULL); } return NULL; @@ -1003,7 +1003,7 @@ BinaryFormatCmd( case 'x': if (count == BINARY_ALL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot use \"*\" in format string with \"x\"", -1)); + "cannot use \"*\" in format string with \"x\"", TCL_INDEX_NONE)); return TCL_ERROR; } else if (count == BINARY_NOCOUNT) { count = 1; @@ -1343,7 +1343,7 @@ BinaryFormatCmd( } error: - Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, TCL_INDEX_NONE)); return TCL_ERROR; } @@ -1724,7 +1724,7 @@ BinaryScanCmd( } error: - Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(errorString, TCL_INDEX_NONE)); return TCL_ERROR; } @@ -2654,7 +2654,7 @@ BinaryEncode64( } if (maxlen < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "line length out of range", -1)); + "line length out of range", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE", "LINE_LENGTH", NULL); return TCL_ERROR; @@ -2782,7 +2782,7 @@ BinaryEncodeUu( } if (lineLength < 5 || lineLength > 85) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "line length out of range", -1)); + "line length out of range", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "BINARY", "ENCODE", "LINE_LENGTH", NULL); return TCL_ERROR; diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 69d4484..e1949a5 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -504,7 +504,7 @@ InfoArgsCmd( localPtr = localPtr->nextPtr) { if (TclIsVarArgument(localPtr)) { Tcl_ListObjAppendElement(interp, listObjPtr, - Tcl_NewStringObj(localPtr->name, -1)); + Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE)); } } Tcl_SetObjResult(interp, listObjPtr); @@ -716,7 +716,7 @@ InfoCommandsCmd( Tcl_GetCommandFullName(interp, cmd, elemObjPtr); } else { cmdName = (const char *)Tcl_GetHashKey(&nsPtr->cmdTable, entryPtr); - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); } Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); Tcl_SetObjResult(interp, listPtr); @@ -744,7 +744,7 @@ InfoCommandsCmd( if (entryPtr != NULL) { cmdName = (const char *)Tcl_GetHashKey(tablePtr, entryPtr); Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(cmdName, -1)); + Tcl_NewStringObj(cmdName, TCL_INDEX_NONE)); Tcl_SetObjResult(interp, listPtr); return TCL_OK; } @@ -766,7 +766,7 @@ InfoCommandsCmd( elemObjPtr = Tcl_NewObj(); Tcl_GetCommandFullName(interp, cmd, elemObjPtr); } else { - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); } Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); } @@ -789,7 +789,7 @@ InfoCommandsCmd( || Tcl_StringMatch(cmdName, simplePattern)) { if (Tcl_FindHashEntry(&nsPtr->cmdTable,cmdName) == NULL) { Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(cmdName, -1)); + Tcl_NewStringObj(cmdName, TCL_INDEX_NONE)); } } entryPtr = Tcl_NextHashEntry(&search); @@ -818,7 +818,7 @@ InfoCommandsCmd( cmdName = (const char *)Tcl_GetHashKey(&nsPtr->cmdTable, entryPtr); if ((simplePattern == NULL) || Tcl_StringMatch(cmdName, simplePattern)) { - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); (void) Tcl_CreateHashEntry(&addedCommandsTable, elemObjPtr, &isNew); @@ -844,7 +844,7 @@ InfoCommandsCmd( cmdName = (const char *)Tcl_GetHashKey(&pathNsPtr->cmdTable, entryPtr); if ((simplePattern == NULL) || Tcl_StringMatch(cmdName, simplePattern)) { - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); (void) Tcl_CreateHashEntry(&addedCommandsTable, elemObjPtr, &isNew); if (isNew) { @@ -871,7 +871,7 @@ InfoCommandsCmd( cmdName = (const char *)Tcl_GetHashKey(&globalNsPtr->cmdTable, entryPtr); if ((simplePattern == NULL) || Tcl_StringMatch(cmdName, simplePattern)) { - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); if (Tcl_FindHashEntry(&addedCommandsTable, (char *) elemObjPtr) == NULL) { Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); @@ -1291,7 +1291,7 @@ TclInfoFrame( * str. */ - ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], -1)); + ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], TCL_INDEX_NONE)); if (framePtr->line) { ADD_PAIR("line", Tcl_NewWideIntObj(framePtr->line[0])); } else { @@ -1305,7 +1305,7 @@ TclInfoFrame( * Precompiled. Result contains the type as signal, nothing else. */ - ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], -1)); + ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], TCL_INDEX_NONE)); break; case TCL_LOCATION_BC: { @@ -1330,7 +1330,7 @@ TclInfoFrame( * Possibly modified: type, path! */ - ADD_PAIR("type", Tcl_NewStringObj(typeString[fPtr->type], -1)); + ADD_PAIR("type", Tcl_NewStringObj(typeString[fPtr->type], TCL_INDEX_NONE)); if (fPtr->line) { ADD_PAIR("line", Tcl_NewWideIntObj(fPtr->line[0])); } @@ -1358,7 +1358,7 @@ TclInfoFrame( * Evaluation of a script file. */ - ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], -1)); + ADD_PAIR("type", Tcl_NewStringObj(typeString[framePtr->type], TCL_INDEX_NONE)); ADD_PAIR("line", Tcl_NewWideIntObj(framePtr->line[0])); ADD_PAIR("file", framePtr->data.eval.path); @@ -1404,7 +1404,7 @@ TclInfoFrame( */ for (i=0 ; ilength ; i++) { - lv[lc++] = Tcl_NewStringObj(efiPtr->fields[i].name, -1); + lv[lc++] = Tcl_NewStringObj(efiPtr->fields[i].name, TCL_INDEX_NONE); if (efiPtr->fields[i].proc) { lv[lc++] = efiPtr->fields[i].proc(efiPtr->fields[i].clientData); @@ -1492,7 +1492,7 @@ InfoFunctionsCmd( " }\n" " }\n" " ::return $cmds\n" -" } [::namespace current]] ", -1); +" } [::namespace current]] ", TCL_INDEX_NONE); if (objc == 2) { Tcl_Obj *arg = Tcl_NewListObj(1, &(objv[1])); @@ -1545,12 +1545,12 @@ InfoHostnameCmd( name = Tcl_GetHostName(); if (name) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(name, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(name, TCL_INDEX_NONE)); return TCL_OK; } Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unable to determine name of host", -1)); + "unable to determine name of host", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "HOSTNAME", "UNKNOWN", NULL); return TCL_ERROR; } @@ -1665,12 +1665,12 @@ InfoLibraryCmd( libDirName = Tcl_GetVar2(interp, "tcl_library", NULL, TCL_GLOBAL_ONLY); if (libDirName != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(libDirName, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(libDirName, TCL_INDEX_NONE)); return TCL_OK; } Tcl_SetObjResult(interp, Tcl_NewStringObj( - "no library has been specified for Tcl", -1)); + "no library has been specified for Tcl", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "VARIABLE", "tcl_library",NULL); return TCL_ERROR; } @@ -1797,7 +1797,7 @@ InfoPatchLevelCmd( patchlevel = Tcl_GetVar2(interp, "tcl_patchLevel", NULL, (TCL_GLOBAL_ONLY | TCL_LEAVE_ERR_MSG)); if (patchlevel != NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(patchlevel, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(patchlevel, TCL_INDEX_NONE)); return TCL_OK; } return TCL_ERROR; @@ -1910,7 +1910,7 @@ InfoProcsCmd( Tcl_GetCommandFullName(interp, (Tcl_Command) cmdPtr, elemObjPtr); } else { - elemObjPtr = Tcl_NewStringObj(simplePattern, -1); + elemObjPtr = Tcl_NewStringObj(simplePattern, TCL_INDEX_NONE); } Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); } @@ -1938,7 +1938,7 @@ InfoProcsCmd( Tcl_GetCommandFullName(interp, (Tcl_Command) cmdPtr, elemObjPtr); } else { - elemObjPtr = Tcl_NewStringObj(cmdName, -1); + elemObjPtr = Tcl_NewStringObj(cmdName, TCL_INDEX_NONE); } Tcl_ListObjAppendElement(interp, listPtr, elemObjPtr); } @@ -1977,7 +1977,7 @@ InfoProcsCmd( if (TclIsProc(cmdPtr) || ((realCmdPtr != NULL) && TclIsProc(realCmdPtr))) { Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(cmdName, -1)); + Tcl_NewStringObj(cmdName, TCL_INDEX_NONE)); } } } @@ -2075,7 +2075,7 @@ InfoSharedlibCmd( } #ifdef TCL_SHLIB_EXT - Tcl_SetObjResult(interp, Tcl_NewStringObj(TCL_SHLIB_EXT, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(TCL_SHLIB_EXT, TCL_INDEX_NONE)); #endif return TCL_OK; } @@ -2172,7 +2172,7 @@ InfoCmdTypeCmd( Tcl_AppendResult(interp, "native", NULL); } else { Tcl_SetObjResult(interp, - Tcl_NewStringObj(TclGetCommandTypeName(command), -1)); + Tcl_NewStringObj(TclGetCommandTypeName(command), TCL_INDEX_NONE)); } return TCL_OK; } @@ -2652,7 +2652,7 @@ Tcl_LpopObjCmd( if (!listLen) { /* empty list, throw the same error as with index "end" */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "index \"end\" out of range", -1)); + "index \"end\" out of range", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "INDEX" "OUTOFRANGE", NULL); return TCL_ERROR; @@ -3374,7 +3374,7 @@ Tcl_LsearchObjCmd( } if (i + 4 > (size_t)objc) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing starting index", -1)); + "missing starting index", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); result = TCL_ERROR; goto done; @@ -3398,7 +3398,7 @@ Tcl_LsearchObjCmd( if (i + 4 > (size_t)objc) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-stride\" option must be " - "followed by stride length", -1)); + "followed by stride length", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); result = TCL_ERROR; goto done; @@ -3409,7 +3409,7 @@ Tcl_LsearchObjCmd( } if (wide < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "stride length must be at least 1", -1)); + "stride length must be at least 1", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BADSTRIDE", NULL); result = TCL_ERROR; @@ -3499,7 +3499,7 @@ Tcl_LsearchObjCmd( if (returnSubindices && sortInfo.indexc==0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "-subindices cannot be used without -index option", -1)); + "-subindices cannot be used without -index option", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BAD_OPTION_MIX", NULL); result = TCL_ERROR; @@ -3508,7 +3508,7 @@ Tcl_LsearchObjCmd( if (bisect && (allMatches || negatedMatch)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "-bisect is not compatible with -all or -not", -1)); + "-bisect is not compatible with -all or -not", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BAD_OPTION_MIX", NULL); result = TCL_ERROR; @@ -3578,7 +3578,7 @@ Tcl_LsearchObjCmd( if (groupOffset >= groupSize) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "when used with \"-stride\", the leading \"-index\"" - " value must be within the group", -1)); + " value must be within the group", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSEARCH", "BADINDEX", NULL); result = TCL_ERROR; @@ -4551,7 +4551,7 @@ Tcl_LsortObjCmd( if (i + 2 == (size_t)objc) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-command\" option must be followed " - "by comparison command", -1)); + "by comparison command", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); sortInfo.resultCode = TCL_ERROR; goto done; @@ -4638,7 +4638,7 @@ Tcl_LsortObjCmd( if (i + 2 == (size_t)objc) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-stride\" option must be " - "followed by stride length", -1)); + "followed by stride length", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); sortInfo.resultCode = TCL_ERROR; goto done; @@ -4649,7 +4649,7 @@ Tcl_LsortObjCmd( } if (wide < 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "stride length must be at least 2", -1)); + "stride length must be at least 2", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSORT", "BADSTRIDE", NULL); sortInfo.resultCode = TCL_ERROR; @@ -4771,7 +4771,7 @@ Tcl_LsortObjCmd( if (groupOffset >= groupSize) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "when used with \"-stride\", the leading \"-index\"" - " value must be within the group", -1)); + " value must be within the group", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "LSORT", "BADINDEX", NULL); sortInfo.resultCode = TCL_ERROR; @@ -5298,7 +5298,7 @@ SortCompare( if (TclGetIntFromObj(infoPtr->interp, Tcl_GetObjResult(infoPtr->interp), &order) != TCL_OK) { Tcl_SetObjResult(infoPtr->interp, Tcl_NewStringObj( - "-compare command returned non-integer result", -1)); + "-compare command returned non-integer result", TCL_INDEX_NONE)); Tcl_SetErrorCode(infoPtr->interp, "TCL", "OPERATION", "LSORT", "COMPARISONFAILED", NULL); infoPtr->resultCode = TCL_ERROR; diff --git a/generic/tclCmdMZ.c b/generic/tclCmdMZ.c index f497f59..77c8cb4 100644 --- a/generic/tclCmdMZ.c +++ b/generic/tclCmdMZ.c @@ -227,7 +227,7 @@ Tcl_RegexpObjCmd( if (doinline && ((objc - 2) != 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "regexp match variables not allowed when using -inline", -1)); + "regexp match variables not allowed when using -inline", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "REGEXP", "MIX_VAR_INLINE", NULL); goto optionError; @@ -1695,7 +1695,7 @@ StringIsCmd( goto str_is_done; } end = string1 + length1; - if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + if (TclParseNumber(NULL, objPtr, NULL, NULL, TCL_INDEX_NONE, (const char **) &stop, 0) != TCL_OK) { result = 0; failat = 0; @@ -1725,7 +1725,7 @@ StringIsCmd( goto str_is_done; } end = string1 + length1; - if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + if (TclParseNumber(NULL, objPtr, NULL, NULL, TCL_INDEX_NONE, (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { if (stop == end) { /* @@ -1776,7 +1776,7 @@ StringIsCmd( break; } end = string1 + length1; - if (TclParseNumber(NULL, objPtr, NULL, NULL, -1, + if (TclParseNumber(NULL, objPtr, NULL, NULL, TCL_INDEX_NONE, (const char **) &stop, TCL_PARSE_INTEGER_ONLY) == TCL_OK) { if (stop == end) { /* @@ -2047,7 +2047,7 @@ StringMapCmd( */ Tcl_SetObjResult(interp, - Tcl_NewStringObj("char map list unbalanced", -1)); + Tcl_NewStringObj("char map list unbalanced", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "MAP", "UNBALANCED", NULL); return TCL_ERROR; @@ -2933,7 +2933,7 @@ StringLowerCmd( length2 = Tcl_UtfToLower(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); - Tcl_AppendToObj(resultPtr, end, -1); + Tcl_AppendToObj(resultPtr, end, TCL_INDEX_NONE); Tcl_SetObjResult(interp, resultPtr); } @@ -3018,7 +3018,7 @@ StringUpperCmd( length2 = Tcl_UtfToUpper(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); - Tcl_AppendToObj(resultPtr, end, -1); + Tcl_AppendToObj(resultPtr, end, TCL_INDEX_NONE); Tcl_SetObjResult(interp, resultPtr); } @@ -3103,7 +3103,7 @@ StringTitleCmd( length2 = Tcl_UtfToTitle(string2); Tcl_SetObjLength(resultPtr, length2 + (start - string1)); - Tcl_AppendToObj(resultPtr, end, -1); + Tcl_AppendToObj(resultPtr, end, TCL_INDEX_NONE); Tcl_SetObjResult(interp, resultPtr); } @@ -3612,7 +3612,7 @@ TclNRSwitchObjCmd( if (objc % 2) { Tcl_ResetResult(interp); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra switch pattern with no body", -1)); + "extra switch pattern with no body", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", "BADARM", NULL); @@ -3630,7 +3630,7 @@ TclNRSwitchObjCmd( Tcl_AppendToObj(Tcl_GetObjResult(interp), ", this may be due to a comment incorrectly" " placed outside of a switch body - see the" - " \"switch\" documentation", -1); + " \"switch\" documentation", TCL_INDEX_NONE); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "SWITCH", "BADARM", "COMMENT?", NULL); break; @@ -3980,7 +3980,7 @@ Tcl_ThrowObjCmd( return TCL_ERROR; } else if (len < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "type must be non-empty list", -1)); + "type must be non-empty list", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "THROW", "BADEXCEPTION", NULL); return TCL_ERROR; @@ -4718,7 +4718,7 @@ TclNRTryObjCmd( case TryFinally: /* finally script */ if (i < objc-2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "finally clause must be last", -1)); + "finally clause must be last", TCL_INDEX_NONE)); Tcl_DecrRefCount(handlersObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "FINALLY", "NONTERMINAL", NULL); @@ -4726,7 +4726,7 @@ TclNRTryObjCmd( } else if (i == objc-1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "wrong # args to finally clause: must be" - " \"... finally script\"", -1)); + " \"... finally script\"", TCL_INDEX_NONE)); Tcl_DecrRefCount(handlersObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "FINALLY", "ARGUMENT", NULL); @@ -4739,7 +4739,7 @@ TclNRTryObjCmd( if (i > objc-4) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "wrong # args to on clause: must be \"... on code" - " variableList script\"", -1)); + " variableList script\"", TCL_INDEX_NONE)); Tcl_DecrRefCount(handlersObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "ON", "ARGUMENT", NULL); @@ -4800,7 +4800,7 @@ TclNRTryObjCmd( } if (bodyShared) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "last non-finally clause must not have a body of \"-\"", -1)); + "last non-finally clause must not have a body of \"-\"", TCL_INDEX_NONE)); Tcl_DecrRefCount(handlersObj); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "TRY", "BADFALLTHROUGH", NULL); diff --git a/generic/tclCompExpr.c b/generic/tclCompExpr.c index b7bcf7c..c503304 100644 --- a/generic/tclCompExpr.c +++ b/generic/tclCompExpr.c @@ -798,14 +798,14 @@ ParseExpr( switch (start[1]) { case 'b': Tcl_AppendToObj(post, - " (invalid binary number?)", -1); + " (invalid binary number?)", TCL_INDEX_NONE); parsePtr->errorType = TCL_PARSE_BAD_NUMBER; errCode = "BADNUMBER"; subErrCode = "BINARY"; break; case 'o': Tcl_AppendToObj(post, - " (invalid octal number?)", -1); + " (invalid octal number?)", TCL_INDEX_NONE); parsePtr->errorType = TCL_PARSE_BAD_NUMBER; errCode = "BADNUMBER"; subErrCode = "OCTAL"; @@ -813,7 +813,7 @@ ParseExpr( default: if (isdigit(UCHAR(start[1]))) { Tcl_AppendToObj(post, - " (invalid octal number?)", -1); + " (invalid octal number?)", TCL_INDEX_NONE); parsePtr->errorType = TCL_PARSE_BAD_NUMBER; errCode = "BADNUMBER"; subErrCode = "OCTAL"; @@ -1462,7 +1462,7 @@ ParseExpr( */ if (post != NULL) { - Tcl_AppendToObj(msg, ";\n", -1); + Tcl_AppendToObj(msg, ";\n", TCL_INDEX_NONE); Tcl_AppendObjToObj(msg, post); Tcl_DecrRefCount(post); } diff --git a/generic/tclDisassemble.c b/generic/tclDisassemble.c index 57adcf0..c06731f 100644 --- a/generic/tclDisassemble.c +++ b/generic/tclDisassemble.c @@ -280,7 +280,7 @@ DisassembleByteCodeObj( Tcl_AppendPrintfToObj(bufferObj, "ByteCode %p, refCt %" TCL_Z_MODIFIER "u, epoch %" TCL_Z_MODIFIER "u, interp %p (epoch %" TCL_Z_MODIFIER "u)\n", codePtr, codePtr->refCount, codePtr->compileEpoch, iPtr, iPtr->compileEpoch); - Tcl_AppendToObj(bufferObj, " Source ", -1); + Tcl_AppendToObj(bufferObj, " Source ", TCL_INDEX_NONE); PrintSourceToObj(bufferObj, codePtr->source, TclMin(codePtr->numSrcBytes, 55)); GetLocationInformation(codePtr->procPtr, &fileObj, &line); @@ -339,7 +339,7 @@ DisassembleByteCodeObj( (localPtr->flags & VAR_TEMPORARY) ? ", temp" : "", (localPtr->flags & VAR_RESOLVED) ? ", resolved" : ""); if (TclIsVarTemporary(localPtr)) { - Tcl_AppendToObj(bufferObj, "\n", -1); + Tcl_AppendToObj(bufferObj, "\n", TCL_INDEX_NONE); } else { Tcl_AppendPrintfToObj(bufferObj, ", \"%s\"\n", localPtr->name); @@ -389,7 +389,7 @@ DisassembleByteCodeObj( if (numCmds == 0) { pc = codeStart; while (pc < codeLimit) { - Tcl_AppendToObj(bufferObj, " ", -1); + Tcl_AppendToObj(bufferObj, " ", TCL_INDEX_NONE); pc += FormatInstruction(codePtr, pc, bufferObj); } return bufferObj; @@ -451,7 +451,7 @@ DisassembleByteCodeObj( srcOffset, (srcOffset + srcLen - 1)); } if (numCmds > 0) { - Tcl_AppendToObj(bufferObj, "\n", -1); + Tcl_AppendToObj(bufferObj, "\n", TCL_INDEX_NONE); } /* @@ -500,14 +500,14 @@ DisassembleByteCodeObj( */ while ((pc-codeStart) < codeOffset) { - Tcl_AppendToObj(bufferObj, " ", -1); + Tcl_AppendToObj(bufferObj, " ", TCL_INDEX_NONE); pc += FormatInstruction(codePtr, pc, bufferObj); } Tcl_AppendPrintfToObj(bufferObj, " Command %d: ", i+1); PrintSourceToObj(bufferObj, (codePtr->source + srcOffset), TclMin(srcLen, 55)); - Tcl_AppendToObj(bufferObj, "\n", -1); + Tcl_AppendToObj(bufferObj, "\n", TCL_INDEX_NONE); } if (pc < codeLimit) { /* @@ -515,7 +515,7 @@ DisassembleByteCodeObj( */ while (pc < codeLimit) { - Tcl_AppendToObj(bufferObj, " ", -1); + Tcl_AppendToObj(bufferObj, " ", TCL_INDEX_NONE); pc += FormatInstruction(codePtr, pc, bufferObj); } } @@ -654,7 +654,7 @@ FormatInstruction( const char *bytes; size_t length; - Tcl_AppendToObj(bufferObj, "\t# ", -1); + Tcl_AppendToObj(bufferObj, "\t# ", TCL_INDEX_NONE); bytes = Tcl_GetStringFromObj(codePtr->objArrayPtr[opnd], &length); PrintSourceToObj(bufferObj, bytes, TclMin(length, 40)); } else if (suffixBuffer[0]) { @@ -663,12 +663,12 @@ FormatInstruction( PrintSourceToObj(bufferObj, suffixSrc, 40); } } - Tcl_AppendToObj(bufferObj, "\n", -1); + Tcl_AppendToObj(bufferObj, "\n", TCL_INDEX_NONE); if (auxPtr && auxPtr->type->printProc) { - Tcl_AppendToObj(bufferObj, "\t\t[", -1); + Tcl_AppendToObj(bufferObj, "\t\t[", TCL_INDEX_NONE); auxPtr->type->printProc(auxPtr->clientData, bufferObj, codePtr, pcOffset); - Tcl_AppendToObj(bufferObj, "]\n", -1); + Tcl_AppendToObj(bufferObj, "]\n", TCL_INDEX_NONE); } return numBytes; } @@ -866,11 +866,11 @@ PrintSourceToObj( size_t i = 0, len; if (stringPtr == NULL) { - Tcl_AppendToObj(appendObj, "\"\"", -1); + Tcl_AppendToObj(appendObj, "\"\"", TCL_INDEX_NONE); return; } - Tcl_AppendToObj(appendObj, "\"", -1); + Tcl_AppendToObj(appendObj, "\"", TCL_INDEX_NONE); p = stringPtr; for (; (*p != '\0') && (i < maxChars); p+=len) { int ucs4; @@ -878,27 +878,27 @@ PrintSourceToObj( len = TclUtfToUCS4(p, &ucs4); switch (ucs4) { case '"': - Tcl_AppendToObj(appendObj, "\\\"", -1); + Tcl_AppendToObj(appendObj, "\\\"", TCL_INDEX_NONE); i += 2; continue; case '\f': - Tcl_AppendToObj(appendObj, "\\f", -1); + Tcl_AppendToObj(appendObj, "\\f", TCL_INDEX_NONE); i += 2; continue; case '\n': - Tcl_AppendToObj(appendObj, "\\n", -1); + Tcl_AppendToObj(appendObj, "\\n", TCL_INDEX_NONE); i += 2; continue; case '\r': - Tcl_AppendToObj(appendObj, "\\r", -1); + Tcl_AppendToObj(appendObj, "\\r", TCL_INDEX_NONE); i += 2; continue; case '\t': - Tcl_AppendToObj(appendObj, "\\t", -1); + Tcl_AppendToObj(appendObj, "\\t", TCL_INDEX_NONE); i += 2; continue; case '\v': - Tcl_AppendToObj(appendObj, "\\v", -1); + Tcl_AppendToObj(appendObj, "\\v", TCL_INDEX_NONE); i += 2; continue; default: @@ -916,9 +916,9 @@ PrintSourceToObj( } } if (*p != '\0') { - Tcl_AppendToObj(appendObj, "...", -1); + Tcl_AppendToObj(appendObj, "...", TCL_INDEX_NONE); } - Tcl_AppendToObj(appendObj, "\"", -1); + Tcl_AppendToObj(appendObj, "\"", TCL_INDEX_NONE); } /* @@ -972,33 +972,33 @@ DisassembleByteCodeAsDicts( TclNewObj(descriptor[0]); if (!(localPtr->flags & (VAR_ARRAY|VAR_LINK))) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("scalar", -1)); + Tcl_NewStringObj("scalar", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_ARRAY) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("array", -1)); + Tcl_NewStringObj("array", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_LINK) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("link", -1)); + Tcl_NewStringObj("link", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_ARGUMENT) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("arg", -1)); + Tcl_NewStringObj("arg", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_TEMPORARY) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("temp", -1)); + Tcl_NewStringObj("temp", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_RESOLVED) { Tcl_ListObjAppendElement(NULL, descriptor[0], - Tcl_NewStringObj("resolved", -1)); + Tcl_NewStringObj("resolved", TCL_INDEX_NONE)); } if (localPtr->flags & VAR_TEMPORARY) { Tcl_ListObjAppendElement(NULL, variables, Tcl_NewListObj(1, descriptor)); } else { - descriptor[1] = Tcl_NewStringObj(localPtr->name, -1); + descriptor[1] = Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE); Tcl_ListObjAppendElement(NULL, variables, Tcl_NewListObj(2, descriptor)); } @@ -1016,7 +1016,7 @@ DisassembleByteCodeAsDicts( TclNewObj(inst); Tcl_ListObjAppendElement(NULL, inst, Tcl_NewStringObj( - instDesc->name, -1)); + instDesc->name, TCL_INDEX_NONE)); opnd = pc + 1; for (i=0 ; inumOperands ; i++) { switch (instDesc->opTypes[i]) { @@ -1082,7 +1082,7 @@ DisassembleByteCodeAsDicts( ".%d", val)); } else if (val == -2) { Tcl_ListObjAppendElement(NULL, inst, Tcl_NewStringObj( - ".end", -1)); + ".end", TCL_INDEX_NONE)); } else { Tcl_ListObjAppendElement(NULL, inst, Tcl_ObjPrintf( ".end-%d", -2-val)); @@ -1115,13 +1115,13 @@ DisassembleByteCodeAsDicts( TclNewObj(aux); for (i=0 ; i<(int)codePtr->numAuxDataItems ; i++) { AuxData *auxData = &codePtr->auxDataArrayPtr[i]; - Tcl_Obj *auxDesc = Tcl_NewStringObj(auxData->type->name, -1); + Tcl_Obj *auxDesc = Tcl_NewStringObj(auxData->type->name, TCL_INDEX_NONE); if (auxData->type->disassembleProc) { Tcl_Obj *desc; TclNewObj(desc); - Tcl_DictObjPut(NULL, desc, Tcl_NewStringObj("name", -1), auxDesc); + Tcl_DictObjPut(NULL, desc, Tcl_NewStringObj("name", TCL_INDEX_NONE), auxDesc); auxDesc = desc; auxData->type->disassembleProc(auxData->clientData, auxDesc, codePtr, 0); @@ -1188,9 +1188,9 @@ DisassembleByteCodeAsDicts( sourceOffset += Decode(srcOffPtr); sourceLength = Decode(srcLenPtr); TclNewObj(cmd); - Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("codefrom", -1), + Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("codefrom", TCL_INDEX_NONE), Tcl_NewWideIntObj(codeOffset)); - Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("codeto", -1), + Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("codeto", TCL_INDEX_NONE), Tcl_NewWideIntObj(codeOffset + codeLength - 1)); /* @@ -1198,13 +1198,13 @@ DisassembleByteCodeAsDicts( * characters are present in the source! */ - Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptfrom", -1), + Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptfrom", TCL_INDEX_NONE), Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source, sourceOffset))); - Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptto", -1), + Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("scriptto", TCL_INDEX_NONE), Tcl_NewWideIntObj(Tcl_NumUtfChars(codePtr->source, sourceOffset + sourceLength - 1))); - Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("script", -1), + Tcl_DictObjPut(NULL, cmd, Tcl_NewStringObj("script", TCL_INDEX_NONE), Tcl_NewStringObj(codePtr->source+sourceOffset, sourceLength)); Tcl_ListObjAppendElement(NULL, commands, cmd); } @@ -1223,32 +1223,32 @@ DisassembleByteCodeAsDicts( */ TclNewObj(description); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("literals", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("literals", TCL_INDEX_NONE), literals); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("variables", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("variables", TCL_INDEX_NONE), variables); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("exception", -1), exn); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("instructions", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("exception", TCL_INDEX_NONE), exn); + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("instructions", TCL_INDEX_NONE), instructions); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("auxiliary", -1), aux); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("commands", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("auxiliary", TCL_INDEX_NONE), aux); + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("commands", TCL_INDEX_NONE), commands); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("script", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("script", TCL_INDEX_NONE), Tcl_NewStringObj(codePtr->source, codePtr->numSrcBytes)); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("namespace", -1), - Tcl_NewStringObj(codePtr->nsPtr->fullName, -1)); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("stackdepth", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("namespace", TCL_INDEX_NONE), + Tcl_NewStringObj(codePtr->nsPtr->fullName, TCL_INDEX_NONE)); + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("stackdepth", TCL_INDEX_NONE), Tcl_NewWideIntObj(codePtr->maxStackDepth)); - Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("exceptdepth", -1), + Tcl_DictObjPut(NULL, description, Tcl_NewStringObj("exceptdepth", TCL_INDEX_NONE), Tcl_NewWideIntObj(codePtr->maxExceptDepth)); if (line >= 0) { Tcl_DictObjPut(NULL, description, - Tcl_NewStringObj("initiallinenumber", -1), + Tcl_NewStringObj("initiallinenumber", TCL_INDEX_NONE), Tcl_NewWideIntObj(line)); } if (file) { Tcl_DictObjPut(NULL, description, - Tcl_NewStringObj("sourcefile", -1), file); + Tcl_NewStringObj("sourcefile", TCL_INDEX_NONE), file); } return description; } @@ -1410,7 +1410,7 @@ Tcl_DisassembleObjCmd( procPtr = TclOOGetProcFromMethod(methodPtr); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "body not available for this kind of constructor", -1)); + "body not available for this kind of constructor", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "DISASSEMBLE", "METHODTYPE", NULL); return TCL_ERROR; @@ -1475,7 +1475,7 @@ Tcl_DisassembleObjCmd( procPtr = TclOOGetProcFromMethod(methodPtr); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "body not available for this kind of destructor", -1)); + "body not available for this kind of destructor", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "DISASSEMBLE", "METHODTYPE", NULL); return TCL_ERROR; @@ -1565,7 +1565,7 @@ Tcl_DisassembleObjCmd( procPtr = TclOOGetProcFromMethod((Method *)Tcl_GetHashValue(hPtr)); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "body not available for this kind of method", -1)); + "body not available for this kind of method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "DISASSEMBLE", "METHODTYPE", NULL); return TCL_ERROR; @@ -1602,7 +1602,7 @@ Tcl_DisassembleObjCmd( if (codePtr->flags & TCL_BYTECODE_PRECOMPILED) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not disassemble prebuilt bytecode", -1)); + "may not disassemble prebuilt bytecode", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "DISASSEMBLE", "BYTECODE", NULL); return TCL_ERROR; diff --git a/generic/tclEnsemble.c b/generic/tclEnsemble.c index a84b188..98f4ae0 100644 --- a/generic/tclEnsemble.c +++ b/generic/tclEnsemble.c @@ -125,7 +125,7 @@ NewNsObj( if (namespacePtr == TclGetGlobalNamespace(nsPtr->interp)) { return Tcl_NewStringObj("::", 2); } - return Tcl_NewStringObj(nsPtr->fullName, -1); + return Tcl_NewStringObj(nsPtr->fullName, TCL_INDEX_NONE); } /* @@ -289,7 +289,7 @@ TclNamespaceEnsembleCmd( if (len < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "ensemble subcommand implementations " - "must be non-empty lists", -1)); + "must be non-empty lists", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "EMPTY_TARGET", NULL); Tcl_DictObjDone(&search); @@ -460,7 +460,7 @@ TclNamespaceEnsembleCmd( /* -map option */ Tcl_ListObjAppendElement(NULL, resultObj, - Tcl_NewStringObj(ensembleConfigOptions[CONF_MAP], -1)); + Tcl_NewStringObj(ensembleConfigOptions[CONF_MAP], TCL_INDEX_NONE)); Tcl_GetEnsembleMappingDict(NULL, token, &tmpObj); Tcl_ListObjAppendElement(NULL, resultObj, (tmpObj != NULL) ? tmpObj : Tcl_NewObj()); @@ -475,14 +475,14 @@ TclNamespaceEnsembleCmd( /* -parameters option */ Tcl_ListObjAppendElement(NULL, resultObj, - Tcl_NewStringObj(ensembleConfigOptions[CONF_PARAM], -1)); + Tcl_NewStringObj(ensembleConfigOptions[CONF_PARAM], TCL_INDEX_NONE)); Tcl_GetEnsembleParameterList(NULL, token, &tmpObj); Tcl_ListObjAppendElement(NULL, resultObj, (tmpObj != NULL) ? tmpObj : Tcl_NewObj()); /* -prefix option */ Tcl_ListObjAppendElement(NULL, resultObj, - Tcl_NewStringObj(ensembleConfigOptions[CONF_PREFIX], -1)); + Tcl_NewStringObj(ensembleConfigOptions[CONF_PREFIX], TCL_INDEX_NONE)); Tcl_GetEnsembleFlags(NULL, token, &flags); Tcl_ListObjAppendElement(NULL, resultObj, Tcl_NewBooleanObj(flags & TCL_ENSEMBLE_PREFIX)); @@ -577,7 +577,7 @@ TclNamespaceEnsembleCmd( if (len < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "ensemble subcommand implementations " - "must be non-empty lists", -1)); + "must be non-empty lists", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "EMPTY_TARGET", NULL); Tcl_DictObjDone(&search); @@ -625,7 +625,7 @@ TclNamespaceEnsembleCmd( } case CONF_NAMESPACE: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "option -namespace is read-only", -1)); + "option -namespace is read-only", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "READ_ONLY", NULL); goto freeMapAndError; @@ -798,7 +798,7 @@ Tcl_SetEnsembleSubcommandList( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); return TCL_ERROR; } @@ -874,7 +874,7 @@ Tcl_SetEnsembleParameterList( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); return TCL_ERROR; } @@ -950,7 +950,7 @@ Tcl_SetEnsembleMappingDict( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); return TCL_ERROR; } @@ -1050,7 +1050,7 @@ Tcl_SetEnsembleUnknownHandler( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); return TCL_ERROR; } @@ -1116,7 +1116,7 @@ Tcl_SetEnsembleFlags( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); return TCL_ERROR; } @@ -1193,7 +1193,7 @@ Tcl_GetEnsembleSubcommandList( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1235,7 +1235,7 @@ Tcl_GetEnsembleParameterList( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1277,7 +1277,7 @@ Tcl_GetEnsembleMappingDict( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1318,7 +1318,7 @@ Tcl_GetEnsembleUnknownHandler( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1359,7 +1359,7 @@ Tcl_GetEnsembleFlags( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1400,7 +1400,7 @@ Tcl_GetEnsembleNamespace( if (cmdPtr->objProc != TclEnsembleImplementationCmd) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command is not an ensemble", -1)); + "command is not an ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "NOT_ENSEMBLE", NULL); } return TCL_ERROR; @@ -1549,7 +1549,7 @@ TclMakeEnsemble( Tcl_DStringInit(&buf); Tcl_DStringInit(&hiddenBuf); TclDStringAppendLiteral(&hiddenBuf, "tcl:"); - Tcl_DStringAppend(&hiddenBuf, name, -1); + Tcl_DStringAppend(&hiddenBuf, name, TCL_INDEX_NONE); TclDStringAppendLiteral(&hiddenBuf, ":"); hiddenLen = Tcl_DStringLength(&hiddenBuf); if (name[0] == ':' && name[1] == ':') { @@ -1558,7 +1558,7 @@ TclMakeEnsemble( */ cmdName = name; - Tcl_DStringAppend(&buf, name, -1); + Tcl_DStringAppend(&buf, name, TCL_INDEX_NONE); ensembleFlags = TCL_ENSEMBLE_PREFIX; } else { /* @@ -1574,7 +1574,7 @@ TclMakeEnsemble( for (i = 0; i < nameCount; ++i) { TclDStringAppendLiteral(&buf, "::"); - Tcl_DStringAppend(&buf, nameParts[i], -1); + Tcl_DStringAppend(&buf, nameParts[i], TCL_INDEX_NONE); } } @@ -1619,10 +1619,10 @@ TclMakeEnsemble( TclDStringAppendLiteral(&buf, "::"); TclNewObj(mapDict); for (i=0 ; map[i].name != NULL ; i++) { - fromObj = Tcl_NewStringObj(map[i].name, -1); + fromObj = Tcl_NewStringObj(map[i].name, TCL_INDEX_NONE); TclNewStringObj(toObj, Tcl_DStringValue(&buf), Tcl_DStringLength(&buf)); - Tcl_AppendToObj(toObj, map[i].name, -1); + Tcl_AppendToObj(toObj, map[i].name, TCL_INDEX_NONE); Tcl_DictObjPut(NULL, mapDict, fromObj, toObj); if (map[i].proc || map[i].nreProc) { @@ -1640,7 +1640,7 @@ TclMakeEnsemble( map[i].nreProc, map[i].clientData, NULL); Tcl_DStringSetLength(&hiddenBuf, hiddenLen); if (Tcl_HideCommand(interp, "___tmp", - Tcl_DStringAppend(&hiddenBuf, map[i].name, -1))) { + Tcl_DStringAppend(&hiddenBuf, map[i].name, TCL_INDEX_NONE))) { Tcl_Panic("%s", Tcl_GetStringResult(interp)); } } else { @@ -1737,7 +1737,7 @@ NsEnsembleImplementationCmdNR( Tcl_DStringInit(&buf); if (ensemblePtr->parameterList) { Tcl_DStringAppend(&buf, - TclGetString(ensemblePtr->parameterList), -1); + TclGetString(ensemblePtr->parameterList), TCL_INDEX_NONE); TclDStringAppendLiteral(&buf, " "); } TclDStringAppendLiteral(&buf, "subcommand ?arg ...?"); @@ -1754,7 +1754,7 @@ NsEnsembleImplementationCmdNR( if (!Tcl_InterpDeleted(interp)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "ensemble activated for deleted namespace", -1)); + "ensemble activated for deleted namespace", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "DEAD", NULL); } return TCL_ERROR; @@ -1869,7 +1869,7 @@ NsEnsembleImplementationCmdNR( * Record the spelling correction for usage message. */ - fix = Tcl_NewStringObj(fullName, -1); + fix = Tcl_NewStringObj(fullName, TCL_INDEX_NONE); /* * Cache for later in the subcommand object. @@ -1980,12 +1980,12 @@ NsEnsembleImplementationCmdNR( (ensemblePtr->flags & TCL_ENSEMBLE_PREFIX ? " or ambiguous" : ""), TclGetString(subObj)); if (ensemblePtr->subcommandTable.numEntries == 1) { - Tcl_AppendToObj(errorObj, ensemblePtr->subcommandArrayPtr[0], -1); + Tcl_AppendToObj(errorObj, ensemblePtr->subcommandArrayPtr[0], TCL_INDEX_NONE); } else { size_t i; for (i=0 ; isubcommandTable.numEntries-1 ; i++) { - Tcl_AppendToObj(errorObj, ensemblePtr->subcommandArrayPtr[i], -1); + Tcl_AppendToObj(errorObj, ensemblePtr->subcommandArrayPtr[i], TCL_INDEX_NONE); Tcl_AppendToObj(errorObj, ", ", 2); } Tcl_AppendPrintfToObj(errorObj, "or %s", @@ -2326,7 +2326,7 @@ EnsembleUnknownCallback( if ((result == TCL_OK) && (ensemblePtr->flags & ENSEMBLE_DEAD)) { if (!Tcl_InterpDeleted(interp)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unknown subcommand handler deleted its ensemble", -1)); + "unknown subcommand handler deleted its ensemble", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENSEMBLE", "UNKNOWN_DELETED", NULL); } @@ -2374,16 +2374,16 @@ EnsembleUnknownCallback( if (result != TCL_ERROR) { Tcl_ResetResult(interp); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unknown subcommand handler returned bad code: ", -1)); + "unknown subcommand handler returned bad code: ", TCL_INDEX_NONE)); switch (result) { case TCL_RETURN: - Tcl_AppendToObj(Tcl_GetObjResult(interp), "return", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "return", TCL_INDEX_NONE); break; case TCL_BREAK: - Tcl_AppendToObj(Tcl_GetObjResult(interp), "break", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "break", TCL_INDEX_NONE); break; case TCL_CONTINUE: - Tcl_AppendToObj(Tcl_GetObjResult(interp), "continue", -1); + Tcl_AppendToObj(Tcl_GetObjResult(interp), "continue", TCL_INDEX_NONE); break; default: Tcl_AppendPrintfToObj(Tcl_GetObjResult(interp), "%d", result); @@ -2625,7 +2625,7 @@ BuildEnsembleConfig( name = TclGetString(subv[i+1]); hPtr = Tcl_CreateHashEntry(hash, name, &isNew); if (isNew) { - cmdObj = Tcl_NewStringObj(name, -1); + cmdObj = Tcl_NewStringObj(name, TCL_INDEX_NONE); cmdPrefixObj = Tcl_NewListObj(1, &cmdObj); Tcl_SetHashValue(hPtr, cmdPrefixObj); Tcl_IncrRefCount(cmdPrefixObj); @@ -2663,7 +2663,7 @@ BuildEnsembleConfig( * programmer (or [::unknown] of course) to provide the procedure. */ - cmdObj = Tcl_NewStringObj(name, -1); + cmdObj = Tcl_NewStringObj(name, TCL_INDEX_NONE); cmdPrefixObj = Tcl_NewListObj(1, &cmdObj); Tcl_SetHashValue(hPtr, cmdPrefixObj); Tcl_IncrRefCount(cmdPrefixObj); diff --git a/generic/tclEvent.c b/generic/tclEvent.c index 4a61d60..64935e6 100644 --- a/generic/tclEvent.c +++ b/generic/tclEvent.c @@ -281,7 +281,7 @@ HandleBgErrors( Tcl_DecrRefCount(keyPtr); Tcl_WriteChars(errChannel, - "error in background error handler:\n", -1); + "error in background error handler:\n", TCL_INDEX_NONE); if (valuePtr) { Tcl_WriteObj(errChannel, valuePtr); } else { @@ -343,7 +343,7 @@ TclDefaultBgErrorHandlerObjCmd( Tcl_DecrRefCount(keyPtr); if (result != TCL_OK || valuePtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing return option \"-level\"", -1)); + "missing return option \"-level\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); return TCL_ERROR; } @@ -356,7 +356,7 @@ TclDefaultBgErrorHandlerObjCmd( Tcl_DecrRefCount(keyPtr); if (result != TCL_OK || valuePtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing return option \"-code\"", -1)); + "missing return option \"-code\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); return TCL_ERROR; } @@ -474,17 +474,17 @@ TclDefaultBgErrorHandlerObjCmd( Tcl_RestoreInterpState(interp, saved); Tcl_WriteObj(errChannel, Tcl_GetVar2Ex(interp, "errorInfo", NULL, TCL_GLOBAL_ONLY)); - Tcl_WriteChars(errChannel, "\n", -1); + Tcl_WriteChars(errChannel, "\n", TCL_INDEX_NONE); } else { Tcl_DiscardInterpState(saved); Tcl_WriteChars(errChannel, - "bgerror failed to handle background error.\n",-1); - Tcl_WriteChars(errChannel, " Original error: ", -1); + "bgerror failed to handle background error.\n", TCL_INDEX_NONE); + Tcl_WriteChars(errChannel, " Original error: ", TCL_INDEX_NONE); Tcl_WriteObj(errChannel, tempObjv[1]); - Tcl_WriteChars(errChannel, "\n", -1); - Tcl_WriteChars(errChannel, " Error in bgerror: ", -1); + Tcl_WriteChars(errChannel, "\n", TCL_INDEX_NONE); + Tcl_WriteChars(errChannel, " Error in bgerror: ", TCL_INDEX_NONE); Tcl_WriteObj(errChannel, resultPtr); - Tcl_WriteChars(errChannel, "\n", -1); + Tcl_WriteChars(errChannel, "\n", TCL_INDEX_NONE); } Tcl_DecrRefCount(resultPtr); Tcl_Flush(errChannel); @@ -1572,7 +1572,7 @@ Tcl_VwaitObjCmd( if (timeout < 0) { Tcl_ResetResult(interp); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "timeout must be positive", -1)); + "timeout must be positive", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "EVENT", "NEGTIME", NULL); result = TCL_ERROR; goto done; @@ -1652,7 +1652,7 @@ Tcl_VwaitObjCmd( if ((mask & (TCL_FILE_EVENTS | TCL_IDLE_EVENTS | TCL_TIMER_EVENTS | TCL_WINDOW_EVENTS)) == 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can't wait: would block forever", -1)); + "can't wait: would block forever", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "EVENT", "NO_SOURCES", NULL); result = TCL_ERROR; goto done; @@ -1660,7 +1660,7 @@ Tcl_VwaitObjCmd( if ((timeout > 0) && ((mask & TCL_TIMER_EVENTS) == 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "timer events disabled with timeout specified", -1)); + "timer events disabled with timeout specified", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "EVENT", "NO_TIME", NULL); result = TCL_ERROR; goto done; @@ -1688,7 +1688,7 @@ Tcl_VwaitObjCmd( for (i = 0; i < numItems; i++) { if (vwaitItems[i].mask) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "file events disabled with channel(s) specified", -1)); + "file events disabled with channel(s) specified", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "EVENT", "NO_FILE_EVENT", NULL); result = TCL_ERROR; goto done; @@ -1727,7 +1727,7 @@ Tcl_VwaitObjCmd( } if (Tcl_LimitExceeded(interp)) { Tcl_ResetResult(interp); - Tcl_SetObjResult(interp, Tcl_NewStringObj("limit exceeded", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("limit exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "EVENT", "LIMIT", NULL); break; } @@ -1975,7 +1975,7 @@ Tcl_UpdateObjCmd( } if (Tcl_LimitExceeded(interp)) { Tcl_ResetResult(interp); - Tcl_SetObjResult(interp, Tcl_NewStringObj("limit exceeded", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("limit exceeded", TCL_INDEX_NONE)); return TCL_ERROR; } } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 81ce1a7..97122b9 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -2377,7 +2377,7 @@ TEBCresume( if (!corPtr) { TRACE_APPEND(("ERROR: yield outside coroutine\n")); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yield can only be called in a coroutine", -1)); + "yield can only be called in a coroutine", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ILLEGAL_YIELD", NULL); @@ -2408,7 +2408,7 @@ TEBCresume( TRACE(("[%.30s] => ERROR: yield outside coroutine\n", O2S(valuePtr))); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yieldto can only be called in a coroutine", -1)); + "yieldto can only be called in a coroutine", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "ILLEGAL_YIELD", NULL); @@ -2419,7 +2419,7 @@ TEBCresume( TRACE(("[%.30s] => ERROR: yield in deleted\n", O2S(valuePtr))); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "yieldto called in deleted namespace", -1)); + "yieldto called in deleted namespace", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "TCL", "COROUTINE", "YIELDTO_IN_DELETED", NULL); @@ -2482,7 +2482,7 @@ TEBCresume( if (!(iPtr->varFramePtr->isProcCallFrame & 1)) { TRACE(("%d => ERROR: tailcall in non-proc context\n", opnd)); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "tailcall can only be called from a proc or lambda", -1)); + "tailcall can only be called from a proc or lambda", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "TCL", "TAILCALL", "ILLEGAL", NULL); CACHE_STACK_INFO(); @@ -2511,7 +2511,7 @@ TEBCresume( */ listPtr = Tcl_NewListObj(opnd, &OBJ_AT_DEPTH(opnd-1)); - nsObjPtr = Tcl_NewStringObj(iPtr->varFramePtr->nsPtr->fullName, -1); + nsObjPtr = Tcl_NewStringObj(iPtr->varFramePtr->nsPtr->fullName, TCL_INDEX_NONE); TclListObjSetElement(interp, listPtr, 0, nsObjPtr); if (iPtr->varFramePtr->tailcallPtr) { Tcl_DecrRefCount(iPtr->varFramePtr->tailcallPtr); @@ -5150,7 +5150,7 @@ TEBCresume( { int checkEq = ((*pc == INST_EQ) || (*pc == INST_NEQ) || (*pc == INST_STR_EQ) || (*pc == INST_STR_NEQ)); - match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, -1); + match = TclStringCmp(valuePtr, value2Ptr, checkEq, 0, TCL_INDEX_NONE); } /* @@ -5844,7 +5844,7 @@ TEBCresume( case INST_RSHIFT: if (w2 < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "negative shift argument", -1)); + "negative shift argument", TCL_INDEX_NONE)); #ifdef ERROR_CODE_FOR_EARLY_DETECTED_ARITH_ERROR DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "DOMAIN", @@ -5893,7 +5893,7 @@ TEBCresume( case INST_LSHIFT: if (w2 < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "negative shift argument", -1)); + "negative shift argument", TCL_INDEX_NONE)); #ifdef ERROR_CODE_FOR_EARLY_DETECTED_ARITH_ERROR DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "DOMAIN", @@ -5916,7 +5916,7 @@ TEBCresume( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "integer value too large to represent", -1)); + "integer value too large to represent", TCL_INDEX_NONE)); #ifdef ERROR_CODE_FOR_EARLY_DETECTED_ARITH_ERROR DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", @@ -7422,14 +7422,14 @@ TEBCresume( */ divideByZero: - Tcl_SetObjResult(interp, Tcl_NewStringObj("divide by zero", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("divide by zero", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "DIVZERO", "divide by zero", NULL); CACHE_STACK_INFO(); goto gotError; outOfMemory: - Tcl_SetObjResult(interp, Tcl_NewStringObj("out of memory", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("out of memory", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "OUTOFMEMORY", "out of memory", NULL); CACHE_STACK_INFO(); @@ -7442,7 +7442,7 @@ TEBCresume( exponOfZero: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "exponentiation of zero by negative power", -1)); + "exponentiation of zero by negative power", TCL_INDEX_NONE)); DECACHE_STACK_INFO(); Tcl_SetErrorCode(interp, "ARITH", "DOMAIN", "exponentiation of zero by negative power", NULL); @@ -8003,7 +8003,7 @@ ExecuteExtendedBinaryMathOp( } if (invalid) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "negative shift argument", -1)); + "negative shift argument", TCL_INDEX_NONE)); return GENERAL_ARITHMETIC_ERROR; } @@ -8034,7 +8034,7 @@ ExecuteExtendedBinaryMathOp( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "integer value too large to represent", -1)); + "integer value too large to represent", TCL_INDEX_NONE)); return GENERAL_ARITHMETIC_ERROR; } shift = (int)(*((const Tcl_WideInt *)ptr2)); @@ -8282,7 +8282,7 @@ ExecuteExtendedBinaryMathOp( if (type2 != TCL_NUMBER_INT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "exponent too large", -1)); + "exponent too large", TCL_INDEX_NONE)); return GENERAL_ARITHMETIC_ERROR; } @@ -8362,7 +8362,7 @@ ExecuteExtendedBinaryMathOp( || (value2Ptr->typePtr != &tclIntType.objType) || (Tcl_WideUInt)w2 >= (1<<28)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "exponent too large", -1)); + "exponent too large", TCL_INDEX_NONE)); return GENERAL_ARITHMETIC_ERROR; } Tcl_TakeBignumFromObj(NULL, valuePtr, &big1); @@ -9369,16 +9369,16 @@ TclExprFloatError( if ((errno == EDOM) || isnan(value)) { s = "domain error: argument not in valid range"; - Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(s, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "DOMAIN", s, NULL); } else if ((errno == ERANGE) || isinf(value)) { if (value == 0.0) { s = "floating-point value too small to represent"; - Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(s, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "UNDERFLOW", s, NULL); } else { s = "floating-point value too large to represent"; - Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(s, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "OVERFLOW", s, NULL); } } else { diff --git a/generic/tclFileName.c b/generic/tclFileName.c index 168355a..2581d37 100644 --- a/generic/tclFileName.c +++ b/generic/tclFileName.c @@ -537,7 +537,7 @@ Tcl_SplitPath( * Perform the splitting, using objectified, vfs-aware code. */ - tmpPtr = Tcl_NewStringObj(path, -1); + tmpPtr = Tcl_NewStringObj(path, TCL_INDEX_NONE); Tcl_IncrRefCount(tmpPtr); resultPtr = Tcl_FSSplitPath(tmpPtr, argcPtr); Tcl_IncrRefCount(resultPtr); @@ -943,7 +943,7 @@ Tcl_JoinPath( TclNewObj(listObj); for (i = 0; i < argc; i++) { Tcl_ListObjAppendElement(NULL, listObj, - Tcl_NewStringObj(argv[i], -1)); + Tcl_NewStringObj(argv[i], TCL_INDEX_NONE)); } /* @@ -1003,7 +1003,7 @@ Tcl_TranslateFileName( Tcl_DString *bufferPtr) /* Uninitialized or free DString filled with * name. */ { - Tcl_Obj *path = Tcl_NewStringObj(name, -1); + Tcl_Obj *path = Tcl_NewStringObj(name, TCL_INDEX_NONE); Tcl_Obj *transPtr; Tcl_IncrRefCount(path); @@ -1171,7 +1171,7 @@ Tcl_GlobObjCmd( case GLOB_DIR: /* -dir */ if (i == (objc-1)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing argument to \"-directory\"", -1)); + "missing argument to \"-directory\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); return TCL_ERROR; } @@ -1199,7 +1199,7 @@ Tcl_GlobObjCmd( case GLOB_PATH: /* -path */ if (i == (objc-1)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing argument to \"-path\"", -1)); + "missing argument to \"-path\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); return TCL_ERROR; } @@ -1220,7 +1220,7 @@ Tcl_GlobObjCmd( case GLOB_TYPE: /* -types */ if (i == (objc-1)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing argument to \"-types\"", -1)); + "missing argument to \"-types\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "MISSING", NULL); return TCL_ERROR; } @@ -1240,7 +1240,7 @@ Tcl_GlobObjCmd( if ((globFlags & TCL_GLOBMODE_TAILS) && (pathOrDir == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "\"-tails\" must be used with either " - "\"-directory\" or \"-path\"", -1)); + "\"-directory\" or \"-path\"", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "GLOB", "BADOPTIONCOMBINATION", NULL); return TCL_ERROR; @@ -1291,7 +1291,7 @@ Tcl_GlobObjCmd( * in TclGlob requires a non-NULL pathOrDir. */ - Tcl_DStringAppend(&pref, first, -1); + Tcl_DStringAppend(&pref, first, TCL_INDEX_NONE); globFlags &= ~TCL_GLOBMODE_TAILS; pathOrDir = NULL; } else { @@ -1330,7 +1330,7 @@ Tcl_GlobObjCmd( } } if (*search != '\0') { - Tcl_DStringAppend(&prefix, search, -1); + Tcl_DStringAppend(&prefix, search, TCL_INDEX_NONE); } Tcl_DStringFree(&pref); } @@ -1460,7 +1460,7 @@ Tcl_GlobObjCmd( badMacTypesArg: Tcl_SetObjResult(interp, Tcl_NewStringObj( "only one MacOS type or creator argument" - " to \"-types\" allowed", -1)); + " to \"-types\" allowed", TCL_INDEX_NONE)); result = TCL_ERROR; Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "BAD", NULL); join = 0; @@ -1642,7 +1642,7 @@ TclGlob( || (tail[0] == '\\' && tail[1] == '\\'))) { size_t driveNameLen; Tcl_Obj *driveName; - Tcl_Obj *temp = Tcl_NewStringObj(tail, -1); + Tcl_Obj *temp = Tcl_NewStringObj(tail, TCL_INDEX_NONE); Tcl_IncrRefCount(temp); switch (TclGetPathType(temp, NULL, &driveNameLen, &driveName)) { @@ -2033,14 +2033,14 @@ DoGlob( break; } Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unmatched open-brace in file name", -1)); + "unmatched open-brace in file name", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "GLOB", "BALANCE", NULL); return TCL_ERROR; } else if (*p == '}') { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unmatched close-brace in file name", -1)); + "unmatched close-brace in file name", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "GLOB", "BALANCE", NULL); return TCL_ERROR; @@ -2072,7 +2072,7 @@ DoGlob( SkipToChar(&p, ','); Tcl_DStringSetLength(&newName, baseLength); Tcl_DStringAppend(&newName, element, p-element); - Tcl_DStringAppend(&newName, closeBrace+1, -1); + Tcl_DStringAppend(&newName, closeBrace+1, TCL_INDEX_NONE); result = DoGlob(interp, matchesObj, separators, pathPtr, flags, Tcl_DStringValue(&newName), types); if (result != TCL_OK) { diff --git a/generic/tclIOGT.c b/generic/tclIOGT.c index 868791a..532adbd 100644 --- a/generic/tclIOGT.c +++ b/generic/tclIOGT.c @@ -19,25 +19,25 @@ * the transformation. */ -static int TransformBlockModeProc(ClientData instanceData, +static int TransformBlockModeProc(void *instanceData, int mode); -static int TransformCloseProc(ClientData instanceData, +static int TransformCloseProc(void *instanceData, Tcl_Interp *interp, int flags); -static int TransformInputProc(ClientData instanceData, char *buf, +static int TransformInputProc(void *instanceData, char *buf, int toRead, int *errorCodePtr); -static int TransformOutputProc(ClientData instanceData, +static int TransformOutputProc(void *instanceData, const char *buf, int toWrite, int *errorCodePtr); -static int TransformSetOptionProc(ClientData instanceData, +static int TransformSetOptionProc(void *instanceData, Tcl_Interp *interp, const char *optionName, const char *value); -static int TransformGetOptionProc(ClientData instanceData, +static int TransformGetOptionProc(void *instanceData, Tcl_Interp *interp, const char *optionName, Tcl_DString *dsPtr); -static void TransformWatchProc(ClientData instanceData, int mask); -static int TransformGetFileHandleProc(ClientData instanceData, - int direction, ClientData *handlePtr); -static int TransformNotifyProc(ClientData instanceData, int mask); -static long long TransformWideSeekProc(ClientData instanceData, +static void TransformWatchProc(void *instanceData, int mask); +static int TransformGetFileHandleProc(void *instanceData, + int direction, void **handlePtr); +static int TransformNotifyProc(void *instanceData, int mask); +static long long TransformWideSeekProc(void *instanceData, long long offset, int mode, int *errorCodePtr); /* @@ -45,7 +45,7 @@ static long long TransformWideSeekProc(ClientData instanceData, * handling and generating fileeevents. */ -static void TransformChannelHandlerTimer(ClientData clientData); +static void TransformChannelHandlerTimer(void *clientData); /* * Forward declarations of internal procedures. Third, helper procedures @@ -268,7 +268,7 @@ TclChannelTransform( if (TCL_OK != TclListObjLengthM(interp, cmdObjPtr, &objc)) { Tcl_SetObjResult(interp, - Tcl_NewStringObj("-command value is not a list", -1)); + Tcl_NewStringObj("-command value is not a list", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -397,7 +397,7 @@ ExecuteCallback( } Tcl_IncrRefCount(command); - Tcl_ListObjAppendElement(NULL, command, Tcl_NewStringObj((char *) op, -1)); + Tcl_ListObjAppendElement(NULL, command, Tcl_NewStringObj((char *) op, TCL_INDEX_NONE)); /* * Use a byte-array to prevent the misinterpretation of binary data coming @@ -510,7 +510,7 @@ ExecuteCallback( static int TransformBlockModeProc( - ClientData instanceData, /* State of transformation. */ + void *instanceData, /* State of transformation. */ int mode) /* New blocking mode. */ { TransformChannelData *dataPtr = (TransformChannelData *)instanceData; @@ -542,7 +542,7 @@ TransformBlockModeProc( static int TransformCloseProc( - ClientData instanceData, + void *instanceData, Tcl_Interp *interp, int flags) { @@ -626,7 +626,7 @@ TransformCloseProc( static int TransformInputProc( - ClientData instanceData, + void *instanceData, char *buf, int toRead, int *errorCodePtr) @@ -793,7 +793,7 @@ TransformInputProc( static int TransformOutputProc( - ClientData instanceData, + void *instanceData, const char *buf, int toWrite, int *errorCodePtr) @@ -845,7 +845,7 @@ TransformOutputProc( static long long TransformWideSeekProc( - ClientData instanceData, /* The channel to manipulate. */ + void *instanceData, /* The channel to manipulate. */ long long offset, /* Size of movement. */ int mode, /* How to move. */ int *errorCodePtr) /* Location of error flag. */ @@ -923,7 +923,7 @@ TransformWideSeekProc( static int TransformSetOptionProc( - ClientData instanceData, + void *instanceData, Tcl_Interp *interp, const char *optionName, const char *value) @@ -961,7 +961,7 @@ TransformSetOptionProc( static int TransformGetOptionProc( - ClientData instanceData, + void *instanceData, Tcl_Interp *interp, const char *optionName, Tcl_DString *dsPtr) @@ -1008,7 +1008,7 @@ TransformGetOptionProc( static void TransformWatchProc( - ClientData instanceData, /* Channel to watch. */ + void *instanceData, /* Channel to watch. */ int mask) /* Events of interest. */ { TransformChannelData *dataPtr = (TransformChannelData *)instanceData; @@ -1086,9 +1086,9 @@ TransformWatchProc( static int TransformGetFileHandleProc( - ClientData instanceData, /* Channel to query. */ + void *instanceData, /* Channel to query. */ int direction, /* Direction of interest. */ - ClientData *handlePtr) /* Place to store the handle into. */ + void **handlePtr) /* Place to store the handle into. */ { TransformChannelData *dataPtr = (TransformChannelData *)instanceData; @@ -1120,7 +1120,7 @@ TransformGetFileHandleProc( static int TransformNotifyProc( - ClientData clientData, /* The state of the notified + void *clientData, /* The state of the notified * transformation. */ int mask) /* The mask of occuring events. */ { @@ -1165,7 +1165,7 @@ TransformNotifyProc( static void TransformChannelHandlerTimer( - ClientData clientData) /* Transformation to query. */ + void *clientData) /* Transformation to query. */ { TransformChannelData *dataPtr = (TransformChannelData *)clientData; diff --git a/generic/tclIOSock.c b/generic/tclIOSock.c index f14c5c1..a925c3d 100644 --- a/generic/tclIOSock.c +++ b/generic/tclIOSock.c @@ -317,7 +317,7 @@ Tcl_OpenTcpServer( int port, const char *host, Tcl_TcpAcceptProc *acceptProc, - ClientData callbackData) + void *callbackData) { char portbuf[TCL_INTEGER_SPACE]; diff --git a/generic/tclIOUtil.c b/generic/tclIOUtil.c index 470977e..436d364 100644 --- a/generic/tclIOUtil.c +++ b/generic/tclIOUtil.c @@ -1756,7 +1756,7 @@ Tcl_FSEvalFileEx( * Otherwise, replace them. [Bug 3466099] */ - if (Tcl_ReadChars(chan, objPtr, -1, + if (Tcl_ReadChars(chan, objPtr, TCL_INDEX_NONE, memcmp(string, "\xEF\xBB\xBF", 3)) == TCL_IO_FAILURE) { Tcl_CloseEx(interp, chan, 0); Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -1893,7 +1893,7 @@ TclNREvalFile( * Otherwise, replace them. [Bug 3466099] */ - if (Tcl_ReadChars(chan, objPtr, -1, + if (Tcl_ReadChars(chan, objPtr, TCL_INDEX_NONE, memcmp(string, "\xEF\xBB\xBF", 3)) == TCL_IO_FAILURE) { Tcl_CloseEx(interp, chan, 0); Tcl_SetObjResult(interp, Tcl_ObjPrintf( @@ -2459,7 +2459,7 @@ TclFSFileAttrIndex( * It's a constant attribute table, so use T_GIFO. */ - Tcl_Obj *tmpObj = Tcl_NewStringObj(attributeName, -1); + Tcl_Obj *tmpObj = Tcl_NewStringObj(attributeName, TCL_INDEX_NONE); int result; result = Tcl_GetIndexFromObj(NULL, tmpObj, attrTable, NULL, TCL_EXACT, @@ -3292,7 +3292,7 @@ Tcl_LoadFile( Tcl_DecrRefCount(copyToPtr); if (interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "couldn't load from current filesystem", -1)); + "couldn't load from current filesystem", TCL_INDEX_NONE)); } return TCL_ERROR; } @@ -4612,7 +4612,7 @@ Tcl_FSFileSystemInfo( resPtr = Tcl_NewListObj(0, NULL); Tcl_ListObjAppendElement(NULL, resPtr, - Tcl_NewStringObj(fsPtr->typeName, -1)); + Tcl_NewStringObj(fsPtr->typeName, TCL_INDEX_NONE)); if (fsPtr->filesystemPathTypeProc != NULL) { Tcl_Obj *typePtr = fsPtr->filesystemPathTypeProc(pathPtr); diff --git a/generic/tclIndexObj.c b/generic/tclIndexObj.c index 58bcc04..66d7f30 100644 --- a/generic/tclIndexObj.c +++ b/generic/tclIndexObj.c @@ -533,7 +533,7 @@ PrefixMatchObjCmd( case PRFMATCH_MESSAGE: if (i > objc-4) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing value for -message", -1)); + "missing value for -message", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "NOARG", NULL); return TCL_ERROR; } @@ -543,7 +543,7 @@ PrefixMatchObjCmd( case PRFMATCH_ERROR: if (i > objc-4) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "missing value for -error", -1)); + "missing value for -error", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "NOARG", NULL); return TCL_ERROR; } @@ -819,9 +819,9 @@ Tcl_WrongNumArgs( if (iPtr->flags & INTERP_ALTERNATE_WRONG_ARGS) { iPtr->flags &= ~INTERP_ALTERNATE_WRONG_ARGS; Tcl_AppendObjToObj(objPtr, Tcl_GetObjResult(interp)); - Tcl_AppendToObj(objPtr, " or \"", -1); + Tcl_AppendToObj(objPtr, " or \"", TCL_INDEX_NONE); } else { - Tcl_AppendToObj(objPtr, "wrong # args: should be \"", -1); + Tcl_AppendToObj(objPtr, "wrong # args: should be \"", TCL_INDEX_NONE); } /* @@ -1289,7 +1289,7 @@ PrintUsage( * Now add the option information, with pretty-printing. */ - msg = Tcl_NewStringObj("Command-specific options:", -1); + msg = Tcl_NewStringObj("Command-specific options:", TCL_INDEX_NONE); for (infoPtr = argTable; infoPtr->type != TCL_ARGV_END; infoPtr++) { if ((infoPtr->type == TCL_ARGV_HELP) && (infoPtr->keyStr == NULL)) { Tcl_AppendPrintfToObj(msg, "\n%s", infoPtr->helpStr); @@ -1305,7 +1305,7 @@ PrintUsage( } numSpaces -= NUM_SPACES; } - Tcl_AppendToObj(msg, infoPtr->helpStr, -1); + Tcl_AppendToObj(msg, infoPtr->helpStr, TCL_INDEX_NONE); switch (infoPtr->type) { case TCL_ARGV_INT: Tcl_AppendPrintfToObj(msg, "\n\t\tDefault value: %d", diff --git a/generic/tclInterp.c b/generic/tclInterp.c index 416f74e..ecc6e15 100644 --- a/generic/tclInterp.c +++ b/generic/tclInterp.c @@ -186,7 +186,7 @@ struct LimitHandler { int flags; /* The state of this particular handler. */ Tcl_LimitHandlerProc *handlerProc; /* The handler callback. */ - ClientData clientData; /* Opaque argument to the handler callback. */ + void *clientData; /* Opaque argument to the handler callback. */ Tcl_LimitHandlerDeleteProc *deleteProc; /* How to delete the clientData. */ LimitHandler *prevPtr; /* Previous item in linked list of @@ -265,12 +265,12 @@ static void InheritLimitsFromParent(Tcl_Interp *childInterp, Tcl_Interp *parentInterp); static void SetScriptLimitCallback(Tcl_Interp *interp, int type, Tcl_Interp *targetInterp, Tcl_Obj *scriptObj); -static void CallScriptLimitCallback(ClientData clientData, +static void CallScriptLimitCallback(void *clientData, Tcl_Interp *interp); -static void DeleteScriptLimitCallback(ClientData clientData); +static void DeleteScriptLimitCallback(void *clientData); static void RunLimitHandlers(LimitHandler *handlerPtr, Tcl_Interp *interp); -static void TimeLimitCallback(ClientData clientData); +static void TimeLimitCallback(void *clientData); /* NRE enabling */ static Tcl_NRPostProc NRPostInvokeHidden; @@ -339,7 +339,7 @@ Tcl_Init( pkgName.nextPtr = *names; *names = &pkgName; if (tclPreInitScript != NULL) { - if (Tcl_EvalEx(interp, tclPreInitScript, -1, 0) == TCL_ERROR) { + if (Tcl_EvalEx(interp, tclPreInitScript, TCL_INDEX_NONE, 0) == TCL_ERROR) { goto end; } } @@ -449,7 +449,7 @@ Tcl_Init( " error $msg\n" " }\n" "}\n" -"tclInit", -1, 0); +"tclInit", TCL_INDEX_NONE, 0); end: *names = (*names)->nextPtr; @@ -601,7 +601,7 @@ InterpInfoDeleteProc( int Tcl_InterpObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -837,7 +837,7 @@ NRInterpCmd( break; } } - childPtr = Tcl_NewStringObj(buf, -1); + childPtr = Tcl_NewStringObj(buf, TCL_INDEX_NONE); } if (ChildCreate(interp, childPtr, safe) == NULL) { if (buf[0] != '\0') { @@ -872,7 +872,7 @@ NRInterpCmd( return TCL_ERROR; } else if (childInterp == interp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot delete the current interpreter", -1)); + "cannot delete the current interpreter", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "DELETESELF", NULL); return TCL_ERROR; @@ -1053,7 +1053,7 @@ NRInterpCmd( for ( ; hPtr != NULL; hPtr = Tcl_NextHashEntry(&hashSearch)) { string = (char *)Tcl_GetHashKey(&iiPtr->parent.childTable, hPtr); Tcl_ListObjAppendElement(NULL, resultPtr, - Tcl_NewStringObj(string, -1)); + Tcl_NewStringObj(string, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, resultPtr); return TCL_OK; @@ -1207,14 +1207,14 @@ Tcl_CreateAlias( objv = (Tcl_Obj **)TclStackAlloc(childInterp, sizeof(Tcl_Obj *) * argc); for (i = 0; i < argc; i++) { - objv[i] = Tcl_NewStringObj(argv[i], -1); + objv[i] = Tcl_NewStringObj(argv[i], TCL_INDEX_NONE); Tcl_IncrRefCount(objv[i]); } - childObjPtr = Tcl_NewStringObj(childCmd, -1); + childObjPtr = Tcl_NewStringObj(childCmd, TCL_INDEX_NONE); Tcl_IncrRefCount(childObjPtr); - targetObjPtr = Tcl_NewStringObj(targetCmd, -1); + targetObjPtr = Tcl_NewStringObj(targetCmd, TCL_INDEX_NONE); Tcl_IncrRefCount(targetObjPtr); result = AliasCreate(childInterp, childInterp, targetInterp, childObjPtr, @@ -1258,10 +1258,10 @@ Tcl_CreateAliasObj( Tcl_Obj *childObjPtr, *targetObjPtr; int result; - childObjPtr = Tcl_NewStringObj(childCmd, -1); + childObjPtr = Tcl_NewStringObj(childCmd, TCL_INDEX_NONE); Tcl_IncrRefCount(childObjPtr); - targetObjPtr = Tcl_NewStringObj(targetCmd, -1); + targetObjPtr = Tcl_NewStringObj(targetCmd, TCL_INDEX_NONE); Tcl_IncrRefCount(targetObjPtr); result = AliasCreate(childInterp, childInterp, targetInterp, childObjPtr, @@ -1820,7 +1820,7 @@ AliasList( static int AliasNRCmd( - ClientData clientData, /* Alias record. */ + void *clientData, /* Alias record. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument vector. */ @@ -1873,7 +1873,7 @@ AliasNRCmd( int TclAliasObjCmd( - ClientData clientData, /* Alias record. */ + void *clientData, /* Alias record. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument vector. */ @@ -1964,7 +1964,7 @@ TclAliasObjCmd( int TclLocalAliasObjCmd( - ClientData clientData, /* Alias record. */ + void *clientData, /* Alias record. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument vector. */ @@ -2049,7 +2049,7 @@ TclLocalAliasObjCmd( static void AliasObjCmdDeleteProc( - ClientData clientData) /* The alias record for this alias. */ + void *clientData) /* The alias record for this alias. */ { Alias *aliasPtr = (Alias *)clientData; Target *targetPtr; @@ -2116,7 +2116,7 @@ Tcl_CreateChild( Tcl_Obj *pathPtr; Tcl_Interp *childInterp; - pathPtr = Tcl_NewStringObj(childPath, -1); + pathPtr = Tcl_NewStringObj(childPath, TCL_INDEX_NONE); childInterp = ChildCreate(interp, pathPtr, isSafe); Tcl_DecrRefCount(pathPtr); @@ -2147,7 +2147,7 @@ Tcl_GetChild( Tcl_Obj *pathPtr; Tcl_Interp *childInterp; - pathPtr = Tcl_NewStringObj(childPath, -1); + pathPtr = Tcl_NewStringObj(childPath, TCL_INDEX_NONE); childInterp = GetInterp(interp, pathPtr); Tcl_DecrRefCount(pathPtr); @@ -2293,7 +2293,7 @@ Tcl_GetInterpPath( } Tcl_ListObjAppendElement(NULL, Tcl_GetObjResult(interp), Tcl_NewStringObj((const char *)Tcl_GetHashKey(&iiPtr->parent.childTable, - iiPtr->child.childEntryPtr), -1)); + iiPtr->child.childEntryPtr), TCL_INDEX_NONE)); return TCL_OK; } @@ -2386,7 +2386,7 @@ ChildBgerror( if (TCL_ERROR == TclListObjLengthM(NULL, objv[0], &length) || (length < 1)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cmdPrefix must be list of length >= 1", -1)); + "cmdPrefix must be list of length >= 1", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BGERRORFORMAT", NULL); return TCL_ERROR; @@ -2552,7 +2552,7 @@ ChildCreate( int TclChildObjCmd( - ClientData clientData, /* Child interpreter. */ + void *clientData, /* Child interpreter. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -2562,7 +2562,7 @@ TclChildObjCmd( static int NRChildCmd( - ClientData clientData, /* Child interpreter. */ + void *clientData, /* Child interpreter. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -2766,7 +2766,7 @@ NRChildCmd( static void ChildObjCmdDeleteProc( - ClientData clientData) /* The ChildRecord for the command. */ + void *clientData) /* The ChildRecord for the command. */ { Child *childPtr; /* Interim storage for Child record. */ Tcl_Interp *childInterp = (Tcl_Interp *)clientData; @@ -2831,7 +2831,7 @@ ChildDebugCmd( if (objc == 0) { TclNewObj(resultPtr); Tcl_ListObjAppendElement(NULL, resultPtr, - Tcl_NewStringObj("-frame", -1)); + Tcl_NewStringObj("-frame", TCL_INDEX_NONE)); Tcl_ListObjAppendElement(NULL, resultPtr, Tcl_NewBooleanObj(iPtr->flags & INTERP_DEBUG_FRAME)); Tcl_SetObjResult(interp, resultPtr); @@ -3001,7 +3001,7 @@ ChildRecursionLimit( if (objc) { if (Tcl_IsSafe(interp)) { Tcl_SetObjResult(interp, Tcl_NewStringObj("permission denied: " - "safe interpreters cannot change recursion limit", -1)); + "safe interpreters cannot change recursion limit", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "UNSAFE", NULL); return TCL_ERROR; @@ -3020,7 +3020,7 @@ ChildRecursionLimit( iPtr = (Interp *) childInterp; if (interp == childInterp && iPtr->numLevels > (size_t)limit) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "falling back due to new recursion limit", -1)); + "falling back due to new recursion limit", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "RECURSION", NULL); return TCL_ERROR; } @@ -3110,7 +3110,7 @@ ChildHidden( hPtr != NULL; hPtr = Tcl_NextHashEntry(&hSearch)) { Tcl_ListObjAppendElement(NULL, listObjPtr, - Tcl_NewStringObj((const char *)Tcl_GetHashKey(hTblPtr, hPtr), -1)); + Tcl_NewStringObj((const char *)Tcl_GetHashKey(hTblPtr, hPtr), TCL_INDEX_NONE)); } } Tcl_SetObjResult(interp, listObjPtr); @@ -3183,7 +3183,7 @@ ChildInvokeHidden( static int NRPostInvokeHidden( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -3299,7 +3299,7 @@ TclMakeSafe( */ (void) Tcl_EvalEx(interp, - "namespace eval ::tcl {namespace eval mathfunc {}}", -1, 0); + "namespace eval ::tcl {namespace eval mathfunc {}}", TCL_INDEX_NONE, 0); } iPtr->flags |= SAFE_INTERP; @@ -3479,7 +3479,7 @@ Tcl_LimitCheck( iPtr->limit.exceeded &= ~TCL_LIMIT_COMMANDS; } else if (iPtr->limit.exceeded & TCL_LIMIT_COMMANDS) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command count limit exceeded", -1)); + "command count limit exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LIMIT", "COMMANDS", NULL); Tcl_Release(interp); return TCL_ERROR; @@ -3505,7 +3505,7 @@ Tcl_LimitCheck( iPtr->limit.exceeded &= ~TCL_LIMIT_TIME; } else if (iPtr->limit.exceeded & TCL_LIMIT_TIME) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "time limit exceeded", -1)); + "time limit exceeded", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LIMIT", "TIME", NULL); Tcl_Release(interp); return TCL_ERROR; @@ -3608,7 +3608,7 @@ Tcl_LimitAddHandler( Tcl_Interp *interp, int type, Tcl_LimitHandlerProc *handlerProc, - ClientData clientData, + void *clientData, Tcl_LimitHandlerDeleteProc *deleteProc) { Interp *iPtr = (Interp *) interp; @@ -3682,7 +3682,7 @@ Tcl_LimitRemoveHandler( Tcl_Interp *interp, int type, Tcl_LimitHandlerProc *handlerProc, - ClientData clientData) + void *clientData) { Interp *iPtr = (Interp *) interp; LimitHandler *handlerPtr; @@ -4081,7 +4081,7 @@ Tcl_LimitSetTime( static void TimeLimitCallback( - ClientData clientData) + void *clientData) { Tcl_Interp *interp = (Tcl_Interp *)clientData; Interp *iPtr = (Interp *)clientData; @@ -4225,7 +4225,7 @@ Tcl_LimitGetGranularity( static void DeleteScriptLimitCallback( - ClientData clientData) + void *clientData) { ScriptLimitCallback *limitCBPtr = (ScriptLimitCallback *)clientData; @@ -4256,7 +4256,7 @@ DeleteScriptLimitCallback( static void CallScriptLimitCallback( - ClientData clientData, + void *clientData, TCL_UNUSED(Tcl_Interp *)) { ScriptLimitCallback *limitCBPtr = (ScriptLimitCallback *)clientData; @@ -4508,7 +4508,7 @@ ChildCommandLimitCmd( if (interp == childInterp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "limits on current interpreter inaccessible", -1)); + "limits on current interpreter inaccessible", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "SELF", NULL); return TCL_ERROR; } @@ -4523,7 +4523,7 @@ ChildCommandLimitCmd( if (hPtr != NULL) { limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], TCL_INDEX_NONE), limitCBPtr->scriptObj); } else { goto putEmptyCommandInDict; @@ -4534,21 +4534,21 @@ ChildCommandLimitCmd( putEmptyCommandInDict: TclNewObj(empty); Tcl_DictObjPut(NULL, dictPtr, - Tcl_NewStringObj(options[0], -1), empty); + Tcl_NewStringObj(options[0], TCL_INDEX_NONE), empty); } - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[1], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[1], TCL_INDEX_NONE), Tcl_NewWideIntObj(Tcl_LimitGetGranularity(childInterp, TCL_LIMIT_COMMANDS))); if (Tcl_LimitTypeEnabled(childInterp, TCL_LIMIT_COMMANDS)) { - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[2], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[2], TCL_INDEX_NONE), Tcl_NewWideIntObj(Tcl_LimitGetCommands(childInterp))); } else { Tcl_Obj *empty; TclNewObj(empty); Tcl_DictObjPut(NULL, dictPtr, - Tcl_NewStringObj(options[2], -1), empty); + Tcl_NewStringObj(options[2], TCL_INDEX_NONE), empty); } Tcl_SetObjResult(interp, dictPtr); return TCL_OK; @@ -4607,7 +4607,7 @@ ChildCommandLimitCmd( } if (gran < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "granularity must be at least 1", -1)); + "granularity must be at least 1", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADVALUE", NULL); return TCL_ERROR; @@ -4624,7 +4624,7 @@ ChildCommandLimitCmd( } if (limit < 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "command limit value must be at least 0", -1)); + "command limit value must be at least 0", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADVALUE", NULL); return TCL_ERROR; @@ -4696,7 +4696,7 @@ ChildTimeLimitCmd( if (interp == childInterp) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "limits on current interpreter inaccessible", -1)); + "limits on current interpreter inaccessible", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "SELF", NULL); return TCL_ERROR; } @@ -4711,7 +4711,7 @@ ChildTimeLimitCmd( if (hPtr != NULL) { limitCBPtr = (ScriptLimitCallback *)Tcl_GetHashValue(hPtr); if (limitCBPtr != NULL && limitCBPtr->scriptObj != NULL) { - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[0], TCL_INDEX_NONE), limitCBPtr->scriptObj); } else { goto putEmptyCommandInDict; @@ -4721,9 +4721,9 @@ ChildTimeLimitCmd( putEmptyCommandInDict: TclNewObj(empty); Tcl_DictObjPut(NULL, dictPtr, - Tcl_NewStringObj(options[0], -1), empty); + Tcl_NewStringObj(options[0], TCL_INDEX_NONE), empty); } - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[1], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[1], TCL_INDEX_NONE), Tcl_NewWideIntObj(Tcl_LimitGetGranularity(childInterp, TCL_LIMIT_TIME))); @@ -4731,18 +4731,18 @@ ChildTimeLimitCmd( Tcl_Time limitMoment; Tcl_LimitGetTime(childInterp, &limitMoment); - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[2], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[2], TCL_INDEX_NONE), Tcl_NewWideIntObj(limitMoment.usec/1000)); - Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[3], -1), + Tcl_DictObjPut(NULL, dictPtr, Tcl_NewStringObj(options[3], TCL_INDEX_NONE), Tcl_NewWideIntObj(limitMoment.sec)); } else { Tcl_Obj *empty; TclNewObj(empty); Tcl_DictObjPut(NULL, dictPtr, - Tcl_NewStringObj(options[2], -1), empty); + Tcl_NewStringObj(options[2], TCL_INDEX_NONE), empty); Tcl_DictObjPut(NULL, dictPtr, - Tcl_NewStringObj(options[3], -1), empty); + Tcl_NewStringObj(options[3], TCL_INDEX_NONE), empty); } Tcl_SetObjResult(interp, dictPtr); return TCL_OK; @@ -4816,7 +4816,7 @@ ChildTimeLimitCmd( } if (gran < 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "granularity must be at least 1", -1)); + "granularity must be at least 1", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADVALUE", NULL); return TCL_ERROR; @@ -4870,7 +4870,7 @@ ChildTimeLimitCmd( if (secObj != NULL && secLen == 0 && milliLen > 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "may only set -milliseconds if -seconds is not " - "also being reset", -1)); + "also being reset", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADUSAGE", NULL); return TCL_ERROR; @@ -4878,7 +4878,7 @@ ChildTimeLimitCmd( if (milliLen == 0 && (secObj == NULL || secLen > 0)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "may only reset -milliseconds if -seconds is " - "also being reset", -1)); + "also being reset", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "INTERP", "BADUSAGE", NULL); return TCL_ERROR; diff --git a/generic/tclNamesp.c b/generic/tclNamesp.c index 65e2a77..924ffd5 100644 --- a/generic/tclNamesp.c +++ b/generic/tclNamesp.c @@ -71,26 +71,26 @@ typedef struct { * Declarations for functions local to this file: */ -static void DeleteImportedCmd(ClientData clientData); +static void DeleteImportedCmd(void *clientData); static int DoImport(Tcl_Interp *interp, Namespace *nsPtr, Tcl_HashEntry *hPtr, const char *cmdName, const char *pattern, Namespace *importNsPtr, int allowOverwrite); static void DupNsNameInternalRep(Tcl_Obj *objPtr,Tcl_Obj *copyPtr); -static char * ErrorCodeRead(ClientData clientData,Tcl_Interp *interp, +static char * ErrorCodeRead(void *clientData,Tcl_Interp *interp, const char *name1, const char *name2, int flags); -static char * ErrorInfoRead(ClientData clientData,Tcl_Interp *interp, +static char * ErrorInfoRead(void *clientData,Tcl_Interp *interp, const char *name1, const char *name2, int flags); -static char * EstablishErrorCodeTraces(ClientData clientData, +static char * EstablishErrorCodeTraces(void *clientData, Tcl_Interp *interp, const char *name1, const char *name2, int flags); -static char * EstablishErrorInfoTraces(ClientData clientData, +static char * EstablishErrorInfoTraces(void *clientData, Tcl_Interp *interp, const char *name1, const char *name2, int flags); static void FreeNsNameInternalRep(Tcl_Obj *objPtr); static int GetNamespaceFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_Namespace **nsPtrPtr); -static int InvokeImportedNRCmd(ClientData clientData, +static int InvokeImportedNRCmd(void *clientData, Tcl_Interp *interp,int objc,Tcl_Obj *const objv[]); static Tcl_ObjCmdProc NamespaceChildrenCmd; static Tcl_ObjCmdProc NamespaceCodeCmd; @@ -653,7 +653,7 @@ Tcl_CreateNamespace( const char *name, /* Name for the new namespace. May be a * qualified name with names of ancestor * namespaces separated by "::"s. */ - ClientData clientData, /* One-word value to store with namespace. */ + void *clientData, /* One-word value to store with namespace. */ Tcl_NamespaceDeleteProc *deleteProc) /* Function called to delete client data when * the namespace is deleted. NULL if no @@ -698,7 +698,7 @@ Tcl_CreateNamespace( if (deleteProc != NULL) { nameStr = name + strlen(name) - 2; if (nameStr >= name && nameStr[1] == ':' && nameStr[0] == ':') { - Tcl_DStringAppend(&tmpBuffer, name, -1); + Tcl_DStringAppend(&tmpBuffer, name, TCL_INDEX_NONE); while ((nameLen = Tcl_DStringLength(&tmpBuffer)) > 0 && Tcl_DStringValue(&tmpBuffer)[nameLen-1] == ':') { Tcl_DStringSetLength(&tmpBuffer, nameLen-1); @@ -715,7 +715,7 @@ Tcl_CreateNamespace( if (*name == '\0') { Tcl_SetObjResult(interp, Tcl_NewStringObj("can't create namespace" - " \"\": only global namespace can have empty name", -1)); + " \"\": only global namespace can have empty name", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "NAMESPACE", "CREATEGLOBAL", NULL); Tcl_DStringFree(&tmpBuffer); @@ -833,7 +833,7 @@ Tcl_CreateNamespace( Tcl_DString *tempPtr = namePtr; TclDStringAppendLiteral(buffPtr, "::"); - Tcl_DStringAppend(buffPtr, ancestorPtr->name, -1); + Tcl_DStringAppend(buffPtr, ancestorPtr->name, TCL_INDEX_NONE); TclDStringAppendDString(buffPtr, namePtr); /* @@ -1542,7 +1542,7 @@ Tcl_AppendExportList( for (i = 0; i < nsPtr->numExportPatterns; i++) { result = Tcl_ListObjAppendElement(interp, objPtr, - Tcl_NewStringObj(nsPtr->exportArrayPtr[i], -1)); + Tcl_NewStringObj(nsPtr->exportArrayPtr[i], TCL_INDEX_NONE)); if (result != TCL_OK) { return result; } @@ -1621,7 +1621,7 @@ Tcl_Import( int result; TclNewLiteralStringObj(objv[0], "auto_import"); - objv[1] = Tcl_NewStringObj(pattern, -1); + objv[1] = Tcl_NewStringObj(pattern, TCL_INDEX_NONE); Tcl_IncrRefCount(objv[0]); Tcl_IncrRefCount(objv[1]); @@ -1762,11 +1762,11 @@ DoImport( ImportRef *refPtr; Tcl_DStringInit(&ds); - Tcl_DStringAppend(&ds, nsPtr->fullName, -1); + Tcl_DStringAppend(&ds, nsPtr->fullName, TCL_INDEX_NONE); if (nsPtr != ((Interp *) interp)->globalNsPtr) { TclDStringAppendLiteral(&ds, "::"); } - Tcl_DStringAppend(&ds, cmdName, -1); + Tcl_DStringAppend(&ds, cmdName, TCL_INDEX_NONE); /* * Check whether creating the new imported command in the current @@ -2036,7 +2036,7 @@ TclGetOriginalCommand( static int InvokeImportedNRCmd( - ClientData clientData, /* Points to the imported command's + void *clientData, /* Points to the imported command's * ImportedCmdData structure. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ @@ -2051,7 +2051,7 @@ InvokeImportedNRCmd( int TclInvokeImportedCmd( - ClientData clientData, /* Points to the imported command's + void *clientData, /* Points to the imported command's * ImportedCmdData structure. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ @@ -2084,7 +2084,7 @@ TclInvokeImportedCmd( static void DeleteImportedCmd( - ClientData clientData) /* Points to the imported command's + void *clientData) /* Points to the imported command's * ImportedCmdData structure. */ { ImportedCmdData *dataPtr = (ImportedCmdData *)clientData; @@ -3049,11 +3049,11 @@ NamespaceChildrenCmd( if ((*name == ':') && (*(name+1) == ':')) { pattern = name; } else { - Tcl_DStringAppend(&buffer, nsPtr->fullName, -1); + Tcl_DStringAppend(&buffer, nsPtr->fullName, TCL_INDEX_NONE); if (nsPtr != globalNsPtr) { TclDStringAppendLiteral(&buffer, "::"); } - Tcl_DStringAppend(&buffer, name, -1); + Tcl_DStringAppend(&buffer, name, TCL_INDEX_NONE); pattern = Tcl_DStringValue(&buffer); } } @@ -3079,7 +3079,7 @@ NamespaceChildrenCmd( #endif ) { Tcl_ListObjAppendElement(interp, listPtr, - Tcl_NewStringObj(pattern, -1)); + Tcl_NewStringObj(pattern, TCL_INDEX_NONE)); } goto searchDone; } @@ -3095,7 +3095,7 @@ NamespaceChildrenCmd( childNsPtr = (Namespace *)Tcl_GetHashValue(entryPtr); if ((pattern == NULL) || Tcl_StringMatch(childNsPtr->fullName, pattern)) { - elemPtr = Tcl_NewStringObj(childNsPtr->fullName, -1); + elemPtr = Tcl_NewStringObj(childNsPtr->fullName, TCL_INDEX_NONE); Tcl_ListObjAppendElement(interp, listPtr, elemPtr); } entryPtr = Tcl_NextHashEntry(&search); @@ -3185,7 +3185,7 @@ NamespaceCodeCmd( if (currNsPtr == (Namespace *) TclGetGlobalNamespace(interp)) { TclNewLiteralStringObj(objPtr, "::"); } else { - objPtr = Tcl_NewStringObj(currNsPtr->fullName, -1); + objPtr = Tcl_NewStringObj(currNsPtr->fullName, TCL_INDEX_NONE); } Tcl_ListObjAppendElement(interp, listPtr, objPtr); @@ -3243,7 +3243,7 @@ NamespaceCurrentCmd( if (currNsPtr == (Namespace *) TclGetGlobalNamespace(interp)) { Tcl_SetObjResult(interp, Tcl_NewStringObj("::", 2)); } else { - Tcl_SetObjResult(interp, Tcl_NewStringObj(currNsPtr->fullName, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(currNsPtr->fullName, TCL_INDEX_NONE)); } return TCL_OK; } @@ -3358,7 +3358,7 @@ NamespaceDeleteCmd( static int NamespaceEvalCmd( - ClientData clientData, /* Arbitrary value passed to cmd. */ + void *clientData, /* Arbitrary value passed to cmd. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -3451,7 +3451,7 @@ NRNamespaceEvalCmd( static int NsEval_Callback( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -3807,7 +3807,7 @@ NamespaceImportCmd( static int NamespaceInscopeCmd( - ClientData clientData, /* Arbitrary value passed to cmd. */ + void *clientData, /* Arbitrary value passed to cmd. */ Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -3999,7 +3999,7 @@ NamespaceParentCmd( if (nsPtr->parentPtr != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - nsPtr->parentPtr->fullName, -1)); + nsPtr->parentPtr->fullName, TCL_INDEX_NONE)); } return TCL_OK; } @@ -4060,7 +4060,7 @@ NamespacePathCmd( for (i=0 ; icommandPathLength ; i++) { if (nsPtr->commandPathArray[i].nsPtr != NULL) { Tcl_ListObjAppendElement(NULL, resultObj, Tcl_NewStringObj( - nsPtr->commandPathArray[i].nsPtr->fullName, -1)); + nsPtr->commandPathArray[i].nsPtr->fullName, TCL_INDEX_NONE)); } } Tcl_SetObjResult(interp, resultObj); @@ -4544,7 +4544,7 @@ NamespaceTailCmd( } if (p >= name) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(p, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(p, TCL_INDEX_NONE)); } return TCL_OK; } diff --git a/generic/tclOO.c b/generic/tclOO.c index 0d9c7da..bee06e2 100644 --- a/generic/tclOO.c +++ b/generic/tclOO.c @@ -67,9 +67,9 @@ static int CloneClassMethod(Tcl_Interp *interp, Class *clsPtr, Method **newMPtrPtr); static int CloneObjectMethod(Tcl_Interp *interp, Object *oPtr, Method *mPtr, Tcl_Obj *namePtr); -static void DeletedDefineNamespace(ClientData clientData); -static void DeletedObjdefNamespace(ClientData clientData); -static void DeletedHelpersNamespace(ClientData clientData); +static void DeletedDefineNamespace(void *clientData); +static void DeletedObjdefNamespace(void *clientData); +static void DeletedHelpersNamespace(void *clientData); static Tcl_NRPostProc FinalizeAlloc; static Tcl_NRPostProc FinalizeNext; static Tcl_NRPostProc FinalizeObjectCall; @@ -78,23 +78,23 @@ static void InitClassSystemRoots(Tcl_Interp *interp, Foundation *fPtr); static int InitFoundation(Tcl_Interp *interp); static Tcl_InterpDeleteProc KillFoundation; -static void MyDeleted(ClientData clientData); -static void ObjectNamespaceDeleted(ClientData clientData); +static void MyDeleted(void *clientData); +static void ObjectNamespaceDeleted(void *clientData); static Tcl_CommandTraceProc ObjectRenamedTrace; -static inline void RemoveClass(Class **list, int num, int idx); -static inline void RemoveObject(Object **list, int num, int idx); +static inline void RemoveClass(Class **list, size_t num, size_t idx); +static inline void RemoveObject(Object **list, size_t num, size_t idx); static inline void SquelchCachedName(Object *oPtr); -static int PublicNRObjectCmd(ClientData clientData, +static int PublicNRObjectCmd(void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int PrivateNRObjectCmd(ClientData clientData, +static int PrivateNRObjectCmd(void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static int MyClassNRObjCmd(ClientData clientData, +static int MyClassNRObjCmd(void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv); -static void MyClassDeleted(ClientData clientData); +static void MyClassDeleted(void *clientData); /* * Methods in the oo::object and oo::class classes. First, we define a helper @@ -201,10 +201,10 @@ MODULE_SCOPE const TclOOStubs tclOOStubs; static inline void RemoveClass( Class **list, - int num, - int idx) + size_t num, + size_t idx) { - for (; idx < num - 1; idx++) { + for (; idx + 1 < num; idx++) { list[idx] = list[idx + 1]; } list[idx] = NULL; @@ -213,10 +213,10 @@ RemoveClass( static inline void RemoveObject( Object **list, - int num, - int idx) + size_t num, + size_t idx) { - for (; idx < num - 1; idx++) { + for (; idx + 1 < num; idx++) { list[idx] = list[idx + 1]; } list[idx] = NULL; @@ -256,7 +256,7 @@ TclOOInit( * to be fully provided. */ - if (Tcl_EvalEx(interp, initScript, -1, 0) != TCL_OK) { + if (Tcl_EvalEx(interp, initScript, TCL_INDEX_NONE, 0) != TCL_OK) { return TCL_ERROR; } @@ -352,14 +352,14 @@ InitFoundation( Tcl_DStringInit(&buffer); for (i = 0 ; defineCmds[i].name ; i++) { TclDStringAppendLiteral(&buffer, "::oo::define::"); - Tcl_DStringAppend(&buffer, defineCmds[i].name, -1); + Tcl_DStringAppend(&buffer, defineCmds[i].name, TCL_INDEX_NONE); Tcl_CreateObjCommand(interp, Tcl_DStringValue(&buffer), defineCmds[i].objProc, INT2PTR(defineCmds[i].flag), NULL); Tcl_DStringFree(&buffer); } for (i = 0 ; objdefCmds[i].name ; i++) { TclDStringAppendLiteral(&buffer, "::oo::objdefine::"); - Tcl_DStringAppend(&buffer, objdefCmds[i].name, -1); + Tcl_DStringAppend(&buffer, objdefCmds[i].name, TCL_INDEX_NONE); Tcl_CreateObjCommand(interp, Tcl_DStringValue(&buffer), objdefCmds[i].objProc, INT2PTR(objdefCmds[i].flag), NULL); Tcl_DStringFree(&buffer); @@ -429,7 +429,7 @@ InitFoundation( * Evaluate the remaining definitions, which are a compiled-in Tcl script. */ - return Tcl_EvalEx(interp, tclOOSetupScript, -1, 0); + return Tcl_EvalEx(interp, tclOOSetupScript, TCL_INDEX_NONE, 0); } /* @@ -535,7 +535,7 @@ InitClassSystemRoots( static void DeletedDefineNamespace( - ClientData clientData) + void *clientData) { Foundation *fPtr = (Foundation *)clientData; @@ -544,7 +544,7 @@ DeletedDefineNamespace( static void DeletedObjdefNamespace( - ClientData clientData) + void *clientData) { Foundation *fPtr = (Foundation *)clientData; @@ -553,7 +553,7 @@ DeletedObjdefNamespace( static void DeletedHelpersNamespace( - ClientData clientData) + void *clientData) { Foundation *fPtr = (Foundation *)clientData; @@ -789,7 +789,7 @@ SquelchCachedName( static void MyDeleted( - ClientData clientData) /* Reference to the object whose [my] has been + void *clientData) /* Reference to the object whose [my] has been * squelched. */ { Object *oPtr = (Object *)clientData; @@ -799,7 +799,7 @@ MyDeleted( static void MyClassDeleted( - ClientData clientData) + void *clientData) { Object *oPtr = (Object *)clientData; oPtr->myclassCommand = NULL; @@ -820,7 +820,7 @@ MyClassDeleted( static void ObjectRenamedTrace( - ClientData clientData, /* The object being deleted. */ + void *clientData, /* The object being deleted. */ TCL_UNUSED(Tcl_Interp *), TCL_UNUSED(const char *) /*oldName*/, TCL_UNUSED(const char *) /*newName*/, @@ -1038,7 +1038,7 @@ TclOOReleaseClassContents( if (clsPtr->metadataPtr != NULL) { Tcl_ObjectMetadataType *metadataTypePtr; - ClientData value; + void *value; FOREACH_HASH(metadataTypePtr, value, clsPtr->metadataPtr) { metadataTypePtr->deleteProc(value); @@ -1110,7 +1110,7 @@ TclOOReleaseClassContents( static void ObjectNamespaceDeleted( - ClientData clientData) /* Pointer to the class whose namespace is + void *clientData) /* Pointer to the class whose namespace is * being deleted. */ { Object *oPtr = (Object *)clientData; @@ -1261,7 +1261,7 @@ ObjectNamespaceDeleted( if (oPtr->metadataPtr != NULL) { Tcl_ObjectMetadataType *metadataTypePtr; - ClientData value; + void *value; FOREACH_HASH(metadataTypePtr, value, oPtr->metadataPtr) { metadataTypePtr->deleteProc(value); @@ -1675,7 +1675,7 @@ Tcl_NewObjectInstance( { Class *classPtr = (Class *) cls; Object *oPtr; - ClientData clientData[4]; + void *clientData[4]; oPtr = TclNewObjectInstanceCommon(interp, classPtr, nameStr, nsNameStr); if (oPtr == NULL) { @@ -1854,7 +1854,7 @@ TclNewObjectInstanceCommon( static int FinalizeAlloc( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -1870,7 +1870,7 @@ FinalizeAlloc( if (result != TCL_ERROR && Destructing(oPtr)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "object deleted in constructor", -1)); + "object deleted in constructor", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "STILLBORN", NULL); result = TCL_ERROR; } @@ -1941,7 +1941,7 @@ Tcl_CopyObjectInstance( if (IsRootClass(oPtr)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not clone the class of classes", -1)); + "may not clone the class of classes", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "CLONING_CLASS", NULL); return NULL; } @@ -1951,8 +1951,8 @@ Tcl_CopyObjectInstance( */ o2Ptr = (Object *) Tcl_NewObjectInstance(interp, - (Tcl_Class) oPtr->selfCls, targetName, targetNamespaceName, -1, - NULL, -1); + (Tcl_Class) oPtr->selfCls, targetName, targetNamespaceName, TCL_INDEX_NONE, + NULL, TCL_INDEX_NONE); if (o2Ptr == NULL) { return NULL; } @@ -2037,7 +2037,7 @@ Tcl_CopyObjectInstance( if (oPtr->metadataPtr != NULL) { Tcl_ObjectMetadataType *metadataTypePtr; - ClientData value, duplicate; + void *value, *duplicate; FOREACH_HASH(metadataTypePtr, value, oPtr->metadataPtr) { if (metadataTypePtr->cloneProc == NULL) { @@ -2182,7 +2182,7 @@ Tcl_CopyObjectInstance( if (clsPtr->metadataPtr != NULL) { Tcl_ObjectMetadataType *metadataTypePtr; - ClientData value, duplicate; + void *value, *duplicate; FOREACH_HASH(metadataTypePtr, value, clsPtr->metadataPtr) { if (metadataTypePtr->cloneProc == NULL) { @@ -2254,7 +2254,7 @@ CloneObjectMethod( TclNewInstanceMethod(interp, (Tcl_Object) oPtr, namePtr, mPtr->flags & PUBLIC_METHOD, NULL, NULL); } else if (mPtr->typePtr->cloneProc) { - ClientData newClientData; + void *newClientData; if (mPtr->typePtr->cloneProc(interp, mPtr->clientData, &newClientData) != TCL_OK) { @@ -2283,7 +2283,7 @@ CloneClassMethod( m2Ptr = (Method *) TclNewMethod(interp, (Tcl_Class) clsPtr, namePtr, mPtr->flags & PUBLIC_METHOD, NULL, NULL); } else if (mPtr->typePtr->cloneProc) { - ClientData newClientData; + void *newClientData; if (mPtr->typePtr->cloneProc(interp, mPtr->clientData, &newClientData) != TCL_OK) { @@ -2329,7 +2329,7 @@ CloneClassMethod( * ---------------------------------------------------------------------- */ -ClientData +void * Tcl_ClassGetMetadata( Tcl_Class clazz, const Tcl_ObjectMetadataType *typePtr) @@ -2366,7 +2366,7 @@ void Tcl_ClassSetMetadata( Tcl_Class clazz, const Tcl_ObjectMetadataType *typePtr, - ClientData metadata) + void *metadata) { Class *clsPtr = (Class *) clazz; Tcl_HashEntry *hPtr; @@ -2409,7 +2409,7 @@ Tcl_ClassSetMetadata( Tcl_SetHashValue(hPtr, metadata); } -ClientData +void * Tcl_ObjectGetMetadata( Tcl_Object object, const Tcl_ObjectMetadataType *typePtr) @@ -2446,7 +2446,7 @@ void Tcl_ObjectSetMetadata( Tcl_Object object, const Tcl_ObjectMetadataType *typePtr, - ClientData metadata) + void *metadata) { Object *oPtr = (Object *) object; Tcl_HashEntry *hPtr; @@ -2504,7 +2504,7 @@ Tcl_ObjectSetMetadata( int TclOOPublicObjectCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2514,7 +2514,7 @@ TclOOPublicObjectCmd( static int PublicNRObjectCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2525,7 +2525,7 @@ PublicNRObjectCmd( int TclOOPrivateObjectCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2535,7 +2535,7 @@ TclOOPrivateObjectCmd( static int PrivateNRObjectCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2586,7 +2586,7 @@ TclOOInvokeObject( int TclOOMyClassObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2596,7 +2596,7 @@ TclOOMyClassObjCmd( static int MyClassNRObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2749,7 +2749,7 @@ TclOOObjectCmdCore( } if (contextPtr->index >= contextPtr->callPtr->numChain) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "no valid method implementation", -1)); + "no valid method implementation", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "METHOD", TclGetString(methodNamePtr), NULL); TclOODeleteContext(contextPtr); @@ -2768,7 +2768,7 @@ TclOOObjectCmdCore( static int FinalizeObjectCall( - ClientData data[], + void *data[], TCL_UNUSED(Tcl_Interp *), int result) { @@ -2929,7 +2929,7 @@ TclNRObjectContextInvokeNext( static int FinalizeNext( - ClientData data[], + void *data[], TCL_UNUSED(Tcl_Interp *), int result) { diff --git a/generic/tclOOBasic.c b/generic/tclOOBasic.c index ef554d7..d8ef59b 100644 --- a/generic/tclOOBasic.c +++ b/generic/tclOOBasic.c @@ -99,10 +99,10 @@ TclOO_Class_Constructor( * here (and the class definition delegate doesn't run any constructors). */ - nameObj = Tcl_NewStringObj(oPtr->namespacePtr->fullName, -1); - Tcl_AppendToObj(nameObj, ":: oo ::delegate", -1); + nameObj = Tcl_NewStringObj(oPtr->namespacePtr->fullName, TCL_INDEX_NONE); + Tcl_AppendToObj(nameObj, ":: oo ::delegate", TCL_INDEX_NONE); Tcl_NewObjectInstance(interp, (Tcl_Class) oPtr->fPtr->classCls, - TclGetString(nameObj), NULL, -1, NULL, -1); + TclGetString(nameObj), NULL, TCL_INDEX_NONE, NULL, TCL_INDEX_NONE); Tcl_DecrRefCount(nameObj); /* @@ -147,7 +147,7 @@ DecrRefsPostClassConstructor( TclDecrRefCount(invoke[0]); TclDecrRefCount(invoke[1]); TclDecrRefCount(invoke[2]); - invoke[0] = Tcl_NewStringObj("::oo::MixinClassDelegates", -1); + invoke[0] = Tcl_NewStringObj("::oo::MixinClassDelegates", TCL_INDEX_NONE); invoke[1] = TclOOObjectName(interp, oPtr); Tcl_IncrRefCount(invoke[0]); Tcl_IncrRefCount(invoke[1]); @@ -213,7 +213,7 @@ TclOO_Class_Create( objv[Tcl_ObjectContextSkippedArgs(context)], &len); if (len == 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "object name must not be empty", -1)); + "object name must not be empty", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "EMPTY_NAME", NULL); return TCL_ERROR; } @@ -278,7 +278,7 @@ TclOO_Class_CreateNs( objv[Tcl_ObjectContextSkippedArgs(context)], &len); if (len == 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "object name must not be empty", -1)); + "object name must not be empty", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "EMPTY_NAME", NULL); return TCL_ERROR; } @@ -286,7 +286,7 @@ TclOO_Class_CreateNs( objv[Tcl_ObjectContextSkippedArgs(context)+1], &len); if (len == 0) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "namespace name must not be empty", -1)); + "namespace name must not be empty", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "EMPTY_NAME", NULL); return TCL_ERROR; } @@ -598,14 +598,14 @@ TclOO_Object_Unknown( TclGetString(objv[skip])); for (i=0 ; ifullName, -1); + varNamePtr = Tcl_NewStringObj(namespacePtr->fullName, TCL_INDEX_NONE); Tcl_AppendToObj(varNamePtr, "::", 2); Tcl_AppendObjToObj(varNamePtr, argPtr); } @@ -840,10 +840,10 @@ TclOO_Object_VarName( * WARNING! This code pokes inside the implementation of hash tables! */ - Tcl_AppendToObj(varNamePtr, "(", -1); + Tcl_AppendToObj(varNamePtr, "(", TCL_INDEX_NONE); Tcl_AppendObjToObj(varNamePtr, ((VarInHash *) varPtr)->entry.key.objPtr); - Tcl_AppendToObj(varNamePtr, ")", -1); + Tcl_AppendToObj(varNamePtr, ")", TCL_INDEX_NONE); } else { Tcl_GetVariableFullName(interp, (Tcl_Var) varPtr, varNamePtr); } @@ -1097,7 +1097,7 @@ TclOOSelfObjCmd( if (clsPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method not defined by a class", -1)); + "method not defined by a class", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "UNMATCHED_CONTEXT", NULL); return TCL_ERROR; } @@ -1118,7 +1118,7 @@ TclOOSelfObjCmd( case SELF_FILTER: if (!CurrentlyInvoked(contextPtr).isFilter) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "not inside a filtering context", -1)); + "not inside a filtering context", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "UNMATCHED_CONTEXT", NULL); return TCL_ERROR; } else { @@ -1135,7 +1135,7 @@ TclOOSelfObjCmd( } result[0] = TclOOObjectName(interp, oPtr); - result[1] = Tcl_NewStringObj(type, -1); + result[1] = Tcl_NewStringObj(type, TCL_INDEX_NONE); result[2] = miPtr->mPtr->namePtr; Tcl_SetObjResult(interp, Tcl_NewListObj(3, result)); return TCL_OK; @@ -1144,7 +1144,7 @@ TclOOSelfObjCmd( if ((framePtr->callerVarPtr == NULL) || !(framePtr->callerVarPtr->isProcCallFrame & FRAME_IS_METHOD)){ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "caller is not an object", -1)); + "caller is not an object", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "CONTEXT_REQUIRED", NULL); return TCL_ERROR; } else { @@ -1162,7 +1162,7 @@ TclOOSelfObjCmd( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method without declarer!", -1)); + "method without declarer!", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -1194,7 +1194,7 @@ TclOOSelfObjCmd( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method without declarer!", -1)); + "method without declarer!", TCL_INDEX_NONE)); return TCL_ERROR; } @@ -1212,7 +1212,7 @@ TclOOSelfObjCmd( case SELF_TARGET: if (!CurrentlyInvoked(contextPtr).isFilter) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "not inside a filtering context", -1)); + "not inside a filtering context", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "UNMATCHED_CONTEXT", NULL); return TCL_ERROR; } else { @@ -1239,7 +1239,7 @@ TclOOSelfObjCmd( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "method without declarer!", -1)); + "method without declarer!", TCL_INDEX_NONE)); return TCL_ERROR; } result[0] = TclOOObjectName(interp, declarerPtr); diff --git a/generic/tclOODefineCmds.c b/generic/tclOODefineCmds.c index 63aca58..796a22f 100644 --- a/generic/tclOODefineCmds.c +++ b/generic/tclOODefineCmds.c @@ -78,49 +78,49 @@ static inline void RecomputeClassCacheFlag(Object *oPtr); static int RenameDeleteMethod(Tcl_Interp *interp, Object *oPtr, int useClass, Tcl_Obj *const fromPtr, Tcl_Obj *const toPtr); -static int ClassFilterGet(ClientData clientData, +static int ClassFilterGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassFilterSet(ClientData clientData, +static int ClassFilterSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassMixinGet(ClientData clientData, +static int ClassMixinGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassMixinSet(ClientData clientData, +static int ClassMixinSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassSuperGet(ClientData clientData, +static int ClassSuperGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassSuperSet(ClientData clientData, +static int ClassSuperSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassVarsGet(ClientData clientData, +static int ClassVarsGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ClassVarsSet(ClientData clientData, +static int ClassVarsSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjFilterGet(ClientData clientData, +static int ObjFilterGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjFilterSet(ClientData clientData, +static int ObjFilterSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjMixinGet(ClientData clientData, +static int ObjMixinGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjMixinSet(ClientData clientData, +static int ObjMixinSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjVarsGet(ClientData clientData, +static int ObjVarsGet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ObjVarsSet(ClientData clientData, +static int ObjVarsSet(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); -static int ResolveClass(ClientData clientData, +static int ResolveClass(void *clientData, Tcl_Interp *interp, Tcl_ObjectContext context, int objc, Tcl_Obj *const *objv); @@ -633,7 +633,7 @@ RenameDeleteMethod( if (hPtr == newHPtr) { renameToSelf: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot rename method to itself", -1)); + "cannot rename method to itself", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "RENAME_TO_SELF", NULL); return TCL_ERROR; } else if (!isNew) { @@ -709,7 +709,7 @@ TclOOUnknownDefinition( if (objc < 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "bad call of unknown handler", -1)); + "bad call of unknown handler", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "BAD_UNKNOWN", NULL); return TCL_ERROR; } @@ -743,7 +743,7 @@ TclOOUnknownDefinition( TclStackAlloc(interp, sizeof(Tcl_Obj*) * (objc - 1)); int result; - newObjv[0] = Tcl_NewStringObj(matchedStr, -1); + newObjv[0] = Tcl_NewStringObj(matchedStr, TCL_INDEX_NONE); Tcl_IncrRefCount(newObjv[0]); if (objc > 2) { memcpy(newObjv + 1, objv + 2, sizeof(Tcl_Obj *) * (objc - 2)); @@ -846,7 +846,7 @@ InitDefineContext( if (namespacePtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "no definition namespace available", -1)); + "no definition namespace available", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -887,7 +887,7 @@ TclOOGetDefineCmdContext( && iPtr->varFramePtr->isProcCallFrame != PRIVATE_FRAME)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "this command may only be called from within the context of" - " an ::oo::define or ::oo::objdefine command", -1)); + " an ::oo::define or ::oo::objdefine command", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return NULL; } @@ -895,7 +895,7 @@ TclOOGetDefineCmdContext( if (Tcl_ObjectDeleted(object)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "this command cannot be called when the object has been" - " deleted", -1)); + " deleted", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return NULL; } @@ -938,7 +938,7 @@ GetClassInOuterContext( return NULL; } if (oPtr->classPtr == NULL) { - Tcl_SetObjResult(interp, Tcl_NewStringObj(errMsg, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(errMsg, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "CLASS", TclGetString(className), NULL); return NULL; @@ -1344,7 +1344,7 @@ TclOODefineObjSelfObjCmd( int TclOODefinePrivateObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1437,13 +1437,13 @@ TclOODefineClassObjCmd( } if (oPtr->flags & ROOT_OBJECT) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not modify the class of the root object class", -1)); + "may not modify the class of the root object class", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } if (oPtr->flags & ROOT_CLASS) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not modify the class of the class of classes", -1)); + "may not modify the class of the class of classes", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1463,7 +1463,7 @@ TclOODefineClassObjCmd( } if (oPtr == clsPtr->thisPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not change classes into an instance of themselves", -1)); + "may not change classes into an instance of themselves", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1616,7 +1616,7 @@ TclOODefineDefnNsObjCmd( } if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1647,7 +1647,7 @@ TclOODefineDefnNsObjCmd( if (nsPtr == NULL) { return TCL_ERROR; } - nsNamePtr = Tcl_NewStringObj(nsPtr->fullName, -1); + nsNamePtr = Tcl_NewStringObj(nsPtr->fullName, TCL_INDEX_NONE); Tcl_IncrRefCount(nsNamePtr); } @@ -1680,7 +1680,7 @@ TclOODefineDefnNsObjCmd( int TclOODefineDeleteMethodObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1700,7 +1700,7 @@ TclOODefineDeleteMethodObjCmd( } if (!isInstanceDeleteMethod && !oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1802,7 +1802,7 @@ TclOODefineDestructorObjCmd( int TclOODefineExportObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1826,7 +1826,7 @@ TclOODefineExportObjCmd( clsPtr = oPtr->classPtr; if (!isInstanceExport && !clsPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1898,7 +1898,7 @@ TclOODefineExportObjCmd( int TclOODefineForwardObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1920,7 +1920,7 @@ TclOODefineForwardObjCmd( } if (!isInstanceForward && !oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -1962,7 +1962,7 @@ TclOODefineForwardObjCmd( int TclOODefineMethodObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -1998,7 +1998,7 @@ TclOODefineMethodObjCmd( } if (!isInstanceMethod && !oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2058,7 +2058,7 @@ TclOODefineMethodObjCmd( int TclOODefineRenameMethodObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2077,7 +2077,7 @@ TclOODefineRenameMethodObjCmd( } if (!isInstanceRenameMethod && !oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2115,7 +2115,7 @@ TclOODefineRenameMethodObjCmd( int TclOODefineUnexportObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int objc, Tcl_Obj *const *objv) @@ -2139,7 +2139,7 @@ TclOODefineUnexportObjCmd( clsPtr = oPtr->classPtr; if (!isInstanceUnexport && !clsPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2268,13 +2268,13 @@ TclOODefineSlots( Foundation *fPtr) { const struct DeclaredSlot *slotInfoPtr; - Tcl_Obj *getName = Tcl_NewStringObj("Get", -1); - Tcl_Obj *setName = Tcl_NewStringObj("Set", -1); - Tcl_Obj *resolveName = Tcl_NewStringObj("Resolve", -1); + Tcl_Obj *getName = Tcl_NewStringObj("Get", TCL_INDEX_NONE); + Tcl_Obj *setName = Tcl_NewStringObj("Set", TCL_INDEX_NONE); + Tcl_Obj *resolveName = Tcl_NewStringObj("Resolve", TCL_INDEX_NONE); Class *slotCls; slotCls = ((Object *) Tcl_NewObjectInstance(fPtr->interp, (Tcl_Class) - fPtr->classCls, "::oo::Slot", NULL, -1, NULL, 0))->classPtr; + fPtr->classCls, "::oo::Slot", NULL, TCL_INDEX_NONE, NULL, 0))->classPtr; if (slotCls == NULL) { return TCL_ERROR; } @@ -2283,7 +2283,7 @@ TclOODefineSlots( Tcl_IncrRefCount(resolveName); for (slotInfoPtr = slots ; slotInfoPtr->name ; slotInfoPtr++) { Tcl_Object slotObject = Tcl_NewObjectInstance(fPtr->interp, - (Tcl_Class) slotCls, slotInfoPtr->name, NULL, -1, NULL, 0); + (Tcl_Class) slotCls, slotInfoPtr->name, NULL, TCL_INDEX_NONE, NULL, 0); if (slotObject == NULL) { continue; @@ -2335,7 +2335,7 @@ ClassFilterGet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2371,7 +2371,7 @@ ClassFilterSet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } else if (TclListObjGetElementsM(interp, objv[0], &filterc, @@ -2416,7 +2416,7 @@ ClassMixinGet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2455,7 +2455,7 @@ ClassMixinSet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } else if (TclListObjGetElementsM(interp, objv[0], &mixinc, @@ -2474,7 +2474,7 @@ ClassMixinSet( } if (TclOOIsReachable(oPtr->classPtr, mixins[i])) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not mix a class into itself", -1)); + "may not mix a class into itself", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "SELF_MIXIN", NULL); goto freeAndError; } @@ -2522,7 +2522,7 @@ ClassSuperGet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2561,12 +2561,12 @@ ClassSuperSet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } else if (oPtr == oPtr->fPtr->objectCls->thisPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "may not modify the superclass of the root object", -1)); + "may not modify the superclass of the root object", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } else if (TclListObjGetElementsM(interp, objv[0], &superc, @@ -2614,7 +2614,7 @@ ClassSuperSet( } if (TclOOIsReachable(oPtr->classPtr, superclasses[i])) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to form circular dependency graph", -1)); + "attempt to form circular dependency graph", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "CIRCULARITY", NULL); failedAfterAlloc: for (; i-- > 0 ;) { @@ -2689,7 +2689,7 @@ ClassVarsGet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } @@ -2736,7 +2736,7 @@ ClassVarsSet( return TCL_ERROR; } else if (!oPtr->classPtr) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "attempt to misuse API", -1)); + "attempt to misuse API", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "MONKEY_BUSINESS", NULL); return TCL_ERROR; } else if (TclListObjGetElementsM(interp, objv[0], &varc, diff --git a/generic/tclOOInfo.c b/generic/tclOOInfo.c index b4f9c56..a49282c 100644 --- a/generic/tclOOInfo.c +++ b/generic/tclOOInfo.c @@ -120,10 +120,10 @@ TclOOInitInfo( infoCmd = Tcl_FindCommand(interp, "info", NULL, TCL_GLOBAL_ONLY); if (infoCmd) { Tcl_GetEnsembleMappingDict(NULL, infoCmd, &mapDict); - Tcl_DictObjPut(NULL, mapDict, Tcl_NewStringObj("object", -1), - Tcl_NewStringObj("::oo::InfoObject", -1)); - Tcl_DictObjPut(NULL, mapDict, Tcl_NewStringObj("class", -1), - Tcl_NewStringObj("::oo::InfoClass", -1)); + Tcl_DictObjPut(NULL, mapDict, Tcl_NewStringObj("object", TCL_INDEX_NONE), + Tcl_NewStringObj("::oo::InfoObject", TCL_INDEX_NONE)); + Tcl_DictObjPut(NULL, mapDict, Tcl_NewStringObj("class", TCL_INDEX_NONE), + Tcl_NewStringObj("::oo::InfoClass", TCL_INDEX_NONE)); Tcl_SetEnsembleMappingDict(interp, infoCmd, mapDict); } } @@ -264,7 +264,7 @@ InfoObjectDefnCmd( procPtr = TclOOGetProcFromMethod((Method *)Tcl_GetHashValue(hPtr)); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "definition not available for this kind of method", -1)); + "definition not available for this kind of method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "METHOD", TclGetString(objv[2]), NULL); return TCL_ERROR; @@ -278,7 +278,7 @@ InfoObjectDefnCmd( TclNewObj(argObj); Tcl_ListObjAppendElement(NULL, argObj, - Tcl_NewStringObj(localPtr->name, -1)); + Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE)); if (localPtr->defValuePtr != NULL) { Tcl_ListObjAppendElement(NULL, argObj, localPtr->defValuePtr); } @@ -610,7 +610,7 @@ InfoObjectMethodsCmd( for (i=0 ; i 0) { Tcl_Free((void *)names); @@ -679,7 +679,7 @@ InfoObjectMethodTypeCmd( goto unknownMethod; } - Tcl_SetObjResult(interp, Tcl_NewStringObj(mPtr->typePtr->name, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(mPtr->typePtr->name, TCL_INDEX_NONE)); return TCL_OK; } @@ -787,7 +787,7 @@ InfoObjectNsCmd( } Tcl_SetObjResult(interp, - Tcl_NewStringObj(oPtr->namespacePtr->fullName, -1)); + Tcl_NewStringObj(oPtr->namespacePtr->fullName, TCL_INDEX_NONE)); return TCL_OK; } @@ -943,7 +943,7 @@ InfoClassConstrCmd( procPtr = TclOOGetProcFromMethod(clsPtr->constructorPtr); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "definition not available for this kind of method", -1)); + "definition not available for this kind of method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "METHOD_TYPE", NULL); return TCL_ERROR; } @@ -956,7 +956,7 @@ InfoClassConstrCmd( TclNewObj(argObj); Tcl_ListObjAppendElement(NULL, argObj, - Tcl_NewStringObj(localPtr->name, -1)); + Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE)); if (localPtr->defValuePtr != NULL) { Tcl_ListObjAppendElement(NULL, argObj, localPtr->defValuePtr); } @@ -1010,7 +1010,7 @@ InfoClassDefnCmd( procPtr = TclOOGetProcFromMethod((Method *)Tcl_GetHashValue(hPtr)); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "definition not available for this kind of method", -1)); + "definition not available for this kind of method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "METHOD", TclGetString(objv[2]), NULL); return TCL_ERROR; @@ -1024,7 +1024,7 @@ InfoClassDefnCmd( TclNewObj(argObj); Tcl_ListObjAppendElement(NULL, argObj, - Tcl_NewStringObj(localPtr->name, -1)); + Tcl_NewStringObj(localPtr->name, TCL_INDEX_NONE)); if (localPtr->defValuePtr != NULL) { Tcl_ListObjAppendElement(NULL, argObj, localPtr->defValuePtr); } @@ -1121,7 +1121,7 @@ InfoClassDestrCmd( procPtr = TclOOGetProcFromMethod(clsPtr->destructorPtr); if (procPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "definition not available for this kind of method", -1)); + "definition not available for this kind of method", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OO", "METHOD_TYPE", NULL); return TCL_ERROR; } @@ -1365,7 +1365,7 @@ InfoClassMethodsCmd( for (i=0 ; i 0) { Tcl_Free((void *)names); @@ -1431,7 +1431,7 @@ InfoClassMethodTypeCmd( goto unknownMethod; } - Tcl_SetObjResult(interp, Tcl_NewStringObj(mPtr->typePtr->name, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(mPtr->typePtr->name, TCL_INDEX_NONE)); return TCL_OK; } @@ -1663,7 +1663,7 @@ InfoObjectCallCmd( NULL); if (contextPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot construct any call chain", -1)); + "cannot construct any call chain", TCL_INDEX_NONE)); return TCL_ERROR; } Tcl_SetObjResult(interp, @@ -1708,7 +1708,7 @@ InfoClassCallCmd( callPtr = TclOOGetStereotypeCallChain(clsPtr, objv[2], PUBLIC_METHOD); if (callPtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "cannot construct any call chain", -1)); + "cannot construct any call chain", TCL_INDEX_NONE)); return TCL_ERROR; } Tcl_SetObjResult(interp, TclOORenderCallChain(interp, callPtr)); diff --git a/generic/tclObj.c b/generic/tclObj.c index eaa6766..16b9ca1 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -867,7 +867,7 @@ Tcl_AppendAllObjTypes( for (hPtr = Tcl_FirstHashEntry(&typeTable, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { Tcl_ListObjAppendElement(NULL, objPtr, - Tcl_NewStringObj((char *)Tcl_GetHashKey(&typeTable, hPtr), -1)); + Tcl_NewStringObj((char *)Tcl_GetHashKey(&typeTable, hPtr), TCL_INDEX_NONE)); } Tcl_MutexUnlock(&tableMutex); return TCL_OK; @@ -2009,7 +2009,7 @@ Tcl_GetBoolFromObj( if (interp) { TclNewObj(objPtr); TclParseNumber(interp, objPtr, (flags & TCL_NULL_OK) - ? "boolean value or \"\"" : "boolean value", NULL, -1, NULL, 0); + ? "boolean value or \"\"" : "boolean value", NULL, TCL_INDEX_NONE, NULL, 0); Tcl_DecrRefCount(objPtr); } return TCL_ERROR; @@ -2132,7 +2132,7 @@ TclSetBooleanFromAny( TclNewLiteralStringObj(msg, "expected boolean value but got \""); Tcl_AppendLimitedToObj(msg, str, length, 50, ""); - Tcl_AppendToObj(msg, "\"", -1); + Tcl_AppendToObj(msg, "\"", TCL_INDEX_NONE); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "TCL", "VALUE", "BOOLEAN", NULL); } @@ -2421,7 +2421,7 @@ Tcl_GetDoubleFromObj( if (isnan(objPtr->internalRep.doubleValue)) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "floating point value is Not a Number", -1)); + "floating point value is Not a Number", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "VALUE", "DOUBLE", "NAN", NULL); } @@ -2553,7 +2553,7 @@ Tcl_GetIntFromObj( if (interp != NULL) { const char *s = "integer value too large to represent"; - Tcl_SetObjResult(interp, Tcl_NewStringObj(s, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(s, TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", s, NULL); } return TCL_ERROR; @@ -2718,7 +2718,7 @@ Tcl_GetLongFromObj( #endif if (interp != NULL) { const char *s = "integer value too large to represent"; - Tcl_Obj *msg = Tcl_NewStringObj(s, -1); + Tcl_Obj *msg = Tcl_NewStringObj(s, TCL_INDEX_NONE); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", s, NULL); @@ -2953,7 +2953,7 @@ Tcl_GetWideIntFromObj( } if (interp != NULL) { const char *s = "integer value too large to represent"; - Tcl_Obj *msg = Tcl_NewStringObj(s, -1); + Tcl_Obj *msg = Tcl_NewStringObj(s, TCL_INDEX_NONE); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", s, NULL); @@ -3037,7 +3037,7 @@ Tcl_GetWideUIntFromObj( if (interp != NULL) { const char *s = "integer value too large to represent"; - Tcl_Obj *msg = Tcl_NewStringObj(s, -1); + Tcl_Obj *msg = Tcl_NewStringObj(s, TCL_INDEX_NONE); Tcl_SetObjResult(interp, msg); Tcl_SetErrorCode(interp, "ARITH", "IOVERFLOW", s, NULL); @@ -4539,12 +4539,12 @@ Tcl_RepresentationCmd( } if (objv[1]->bytes) { - Tcl_AppendToObj(descObj, ", string representation \"", -1); + Tcl_AppendToObj(descObj, ", string representation \"", TCL_INDEX_NONE); Tcl_AppendLimitedToObj(descObj, objv[1]->bytes, objv[1]->length, 16, "..."); - Tcl_AppendToObj(descObj, "\"", -1); + Tcl_AppendToObj(descObj, "\"", TCL_INDEX_NONE); } else { - Tcl_AppendToObj(descObj, ", no string representation", -1); + Tcl_AppendToObj(descObj, ", no string representation", TCL_INDEX_NONE); } Tcl_SetObjResult(interp, descObj); diff --git a/generic/tclParse.c b/generic/tclParse.c index 1209a3b..75ffa26 100644 --- a/generic/tclParse.c +++ b/generic/tclParse.c @@ -228,7 +228,7 @@ Tcl_ParseCommand( if ((start == NULL) && (numBytes != 0)) { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "can't parse a NULL pointer", -1)); + "can't parse a NULL pointer", TCL_INDEX_NONE)); } return TCL_ERROR; } @@ -282,13 +282,13 @@ Tcl_ParseCommand( if (src[-1] == '"') { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-quote", -1)); + "extra characters after close-quote", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_QUOTE_EXTRA; } else { if (interp != NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "extra characters after close-brace", -1)); + "extra characters after close-brace", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_BRACE_EXTRA; } @@ -1179,7 +1179,7 @@ ParseTokens( if (numBytes == 0) { if (parsePtr->interp != NULL) { Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-bracket", -1)); + "missing close-bracket", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_MISSING_BRACKET; parsePtr->term = tokenPtr->start; @@ -1425,7 +1425,7 @@ Tcl_ParseVarName( if (numBytes == 0) { if (parsePtr->interp != NULL) { Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-brace for variable name", -1)); + "missing close-brace for variable name", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_MISSING_VAR_BRACE; parsePtr->term = tokenPtr->start-1; @@ -1483,7 +1483,7 @@ Tcl_ParseVarName( if (parsePtr->term == src+numBytes){ if (parsePtr->interp != NULL) { Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing )", -1)); + "missing )", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_MISSING_PAREN; parsePtr->term = src; @@ -1492,7 +1492,7 @@ Tcl_ParseVarName( } else if ((*parsePtr->term != ')')){ if (parsePtr->interp != NULL) { Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "invalid character in array index", -1)); + "invalid character in array index", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_SYNTAX; parsePtr->term = src; @@ -1558,7 +1558,7 @@ Tcl_ParseVar( int code; Tcl_Parse *parsePtr = (Tcl_Parse *)TclStackAlloc(interp, sizeof(Tcl_Parse)); - if (Tcl_ParseVarName(interp, start, -1, parsePtr, 0) != TCL_OK) { + if (Tcl_ParseVarName(interp, start, TCL_INDEX_NONE, parsePtr, 0) != TCL_OK) { TclStackFree(interp, parsePtr); return NULL; } @@ -1765,7 +1765,7 @@ Tcl_ParseBraces( } Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing close-brace", -1)); + "missing close-brace", TCL_INDEX_NONE)); /* * Guess if the problem is due to comments by searching the source string @@ -1788,7 +1788,7 @@ Tcl_ParseBraces( case '#' : if (openBrace && TclIsSpaceProcM(src[-1])) { Tcl_AppendToObj(Tcl_GetObjResult(parsePtr->interp), - ": possible unbalanced brace in comment", -1); + ": possible unbalanced brace in comment", TCL_INDEX_NONE); goto error; } break; @@ -1867,7 +1867,7 @@ Tcl_ParseQuotedString( if (*parsePtr->term != '"') { if (parsePtr->interp != NULL) { Tcl_SetObjResult(parsePtr->interp, Tcl_NewStringObj( - "missing \"", -1)); + "missing \"", TCL_INDEX_NONE)); } parsePtr->errorType = TCL_PARSE_MISSING_QUOTE; parsePtr->term = start; diff --git a/generic/tclPipe.c b/generic/tclPipe.c index 137b415..b18b789 100644 --- a/generic/tclPipe.c +++ b/generic/tclPipe.c @@ -335,7 +335,7 @@ TclCleanupChildren( Tcl_Seek(errorChan, 0, SEEK_SET); TclNewObj(objPtr); - count = Tcl_ReadChars(errorChan, objPtr, -1, 0); + count = Tcl_ReadChars(errorChan, objPtr, TCL_INDEX_NONE, 0); if (count == -1) { result = TCL_ERROR; Tcl_DecrRefCount(objPtr); @@ -361,7 +361,7 @@ TclCleanupChildren( if ((abnormalExit != 0) && (anyErrorInfo == 0) && (interp != NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "child process exited abnormally", -1)); + "child process exited abnormally", TCL_INDEX_NONE)); } return result; } @@ -512,7 +512,7 @@ TclCreatePipeline( if (*p == '\0') { if ((i == (lastBar + 1)) || (i == (argc - 1))) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "illegal use of | or |& in command", -1)); + "illegal use of | or |& in command", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "EXEC", "PIPESYNTAX", NULL); goto error; @@ -700,7 +700,7 @@ TclCreatePipeline( */ Tcl_SetObjResult(interp, Tcl_NewStringObj( - "illegal use of | or |& in command", -1)); + "illegal use of | or |& in command", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "EXEC", "PIPESYNTAX", NULL); goto error; @@ -1054,7 +1054,7 @@ Tcl_OpenCommandChannel( if ((flags & TCL_STDOUT) && (outPipe == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "can't read output from command:" - " standard output was redirected", -1)); + " standard output was redirected", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "EXEC", "BADREDIRECT", NULL); goto error; @@ -1062,7 +1062,7 @@ Tcl_OpenCommandChannel( if ((flags & TCL_STDIN) && (inPipe == NULL)) { Tcl_SetObjResult(interp, Tcl_NewStringObj( "can't write input to command:" - " standard input was redirected", -1)); + " standard input was redirected", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "EXEC", "BADREDIRECT", NULL); goto error; @@ -1074,7 +1074,7 @@ Tcl_OpenCommandChannel( if (channel == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "pipe for command could not be created", -1)); + "pipe for command could not be created", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "OPERATION", "EXEC", "NOPIPE", NULL); goto error; } diff --git a/generic/tclProcess.c b/generic/tclProcess.c index 075877e..0dad7c4 100644 --- a/generic/tclProcess.c +++ b/generic/tclProcess.c @@ -233,9 +233,9 @@ WaitProcessStatus( if (msgObjPtr) *msgObjPtr = Tcl_ObjPrintf( "error waiting for process to exit: %s", msg); if (errorObjPtr) { - errorStrings[0] = Tcl_NewStringObj("POSIX", -1); - errorStrings[1] = Tcl_NewStringObj(Tcl_ErrnoId(), -1); - errorStrings[2] = Tcl_NewStringObj(msg, -1); + errorStrings[0] = Tcl_NewStringObj("POSIX", TCL_INDEX_NONE); + errorStrings[1] = Tcl_NewStringObj(Tcl_ErrnoId(), TCL_INDEX_NONE); + errorStrings[2] = Tcl_NewStringObj(msg, TCL_INDEX_NONE); *errorObjPtr = Tcl_NewListObj(3, errorStrings); } return TCL_PROCESS_ERROR; @@ -256,9 +256,9 @@ WaitProcessStatus( */ if (msgObjPtr) *msgObjPtr = Tcl_NewStringObj( - "child process exited abnormally", -1); + "child process exited abnormally", TCL_INDEX_NONE); if (errorObjPtr) { - errorStrings[0] = Tcl_NewStringObj("CHILDSTATUS", -1); + errorStrings[0] = Tcl_NewStringObj("CHILDSTATUS", TCL_INDEX_NONE); TclNewIntObj(errorStrings[1], resolvedPid); TclNewIntObj(errorStrings[2], WEXITSTATUS(waitStatus)); *errorObjPtr = Tcl_NewListObj(3, errorStrings); @@ -277,10 +277,10 @@ WaitProcessStatus( if (msgObjPtr) *msgObjPtr = Tcl_ObjPrintf( "child killed: %s", msg); if (errorObjPtr) { - errorStrings[0] = Tcl_NewStringObj("CHILDKILLED", -1); + errorStrings[0] = Tcl_NewStringObj("CHILDKILLED", TCL_INDEX_NONE); TclNewIntObj(errorStrings[1], resolvedPid); - errorStrings[2] = Tcl_NewStringObj(Tcl_SignalId(WTERMSIG(waitStatus)), -1); - errorStrings[3] = Tcl_NewStringObj(msg, -1); + errorStrings[2] = Tcl_NewStringObj(Tcl_SignalId(WTERMSIG(waitStatus)), TCL_INDEX_NONE); + errorStrings[3] = Tcl_NewStringObj(msg, TCL_INDEX_NONE); *errorObjPtr = Tcl_NewListObj(4, errorStrings); } return TCL_PROCESS_SIGNALED; @@ -296,10 +296,10 @@ WaitProcessStatus( if (msgObjPtr) *msgObjPtr = Tcl_ObjPrintf( "child suspended: %s", msg); if (errorObjPtr) { - errorStrings[0] = Tcl_NewStringObj("CHILDSUSP", -1); + errorStrings[0] = Tcl_NewStringObj("CHILDSUSP", TCL_INDEX_NONE); TclNewIntObj(errorStrings[1], resolvedPid); - errorStrings[2] = Tcl_NewStringObj(Tcl_SignalId(WSTOPSIG(waitStatus)), -1); - errorStrings[3] = Tcl_NewStringObj(msg, -1); + errorStrings[2] = Tcl_NewStringObj(Tcl_SignalId(WSTOPSIG(waitStatus)), TCL_INDEX_NONE); + errorStrings[3] = Tcl_NewStringObj(msg, TCL_INDEX_NONE); *errorObjPtr = Tcl_NewListObj(4, errorStrings); } return TCL_PROCESS_STOPPED; @@ -312,12 +312,12 @@ WaitProcessStatus( if (codePtr) *codePtr = waitStatus; if (msgObjPtr) *msgObjPtr = Tcl_NewStringObj( - "child wait status didn't make sense\n", -1); + "child wait status didn't make sense\n", TCL_INDEX_NONE); if (errorObjPtr) { - errorStrings[0] = Tcl_NewStringObj("TCL", -1); - errorStrings[1] = Tcl_NewStringObj("OPERATION", -1); - errorStrings[2] = Tcl_NewStringObj("EXEC", -1); - errorStrings[3] = Tcl_NewStringObj("ODDWAITRESULT", -1); + errorStrings[0] = Tcl_NewStringObj("TCL", TCL_INDEX_NONE); + errorStrings[1] = Tcl_NewStringObj("OPERATION", TCL_INDEX_NONE); + errorStrings[2] = Tcl_NewStringObj("EXEC", TCL_INDEX_NONE); + errorStrings[3] = Tcl_NewStringObj("ODDWAITRESULT", TCL_INDEX_NONE); TclNewIntObj(errorStrings[4], resolvedPid); *errorObjPtr = Tcl_NewListObj(5, errorStrings); } diff --git a/generic/tclScan.c b/generic/tclScan.c index ee18174..6a5bfb7 100644 --- a/generic/tclScan.c +++ b/generic/tclScan.c @@ -397,9 +397,9 @@ ValidateFormat( invalidFieldSize: buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; errorMsg = Tcl_NewStringObj( - "field size modifier may not be specified in %", -1); - Tcl_AppendToObj(errorMsg, buf, -1); - Tcl_AppendToObj(errorMsg, " conversion", -1); + "field size modifier may not be specified in %", TCL_INDEX_NONE); + Tcl_AppendToObj(errorMsg, buf, TCL_INDEX_NONE); + Tcl_AppendToObj(errorMsg, " conversion", TCL_INDEX_NONE); Tcl_SetObjResult(interp, errorMsg); Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADSIZE", NULL); goto error; @@ -452,15 +452,15 @@ ValidateFormat( break; badSet: Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unmatched [ in format string", -1)); + "unmatched [ in format string", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BRACKET", NULL); goto error; default: buf[Tcl_UniCharToUtf(ch, buf)] = '\0'; errorMsg = Tcl_NewStringObj( - "bad scan conversion character \"", -1); - Tcl_AppendToObj(errorMsg, buf, -1); - Tcl_AppendToObj(errorMsg, "\"", -1); + "bad scan conversion character \"", TCL_INDEX_NONE); + Tcl_AppendToObj(errorMsg, buf, TCL_INDEX_NONE); + Tcl_AppendToObj(errorMsg, "\"", TCL_INDEX_NONE); Tcl_SetObjResult(interp, errorMsg); Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADTYPE", NULL); goto error; @@ -531,7 +531,7 @@ ValidateFormat( badIndex: if (gotXpg) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "\"%n$\" argument index out of range", -1)); + "\"%n$\" argument index out of range", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "FORMAT", "INDEXRANGE", NULL); } else { Tcl_SetObjResult(interp, Tcl_NewStringObj( @@ -926,7 +926,7 @@ Tcl_ScanObjCmd( mp_int big; if (mp_init_u64(&big, (Tcl_WideUInt)wideValue) != MP_OKAY) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "insufficient memory to create bignum", -1)); + "insufficient memory to create bignum", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); return TCL_ERROR; } else { @@ -953,7 +953,7 @@ Tcl_ScanObjCmd( } Tcl_DecrRefCount(objPtr); Tcl_SetObjResult(interp, Tcl_NewStringObj( - "unsigned bignum scans are invalid", -1)); + "unsigned bignum scans are invalid", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "FORMAT", "BADUNSIGNED",NULL); return TCL_ERROR; @@ -972,7 +972,7 @@ Tcl_ScanObjCmd( mp_int big; if (mp_init_u64(&big, (unsigned long)value) != MP_OKAY) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "insufficient memory to create bignum", -1)); + "insufficient memory to create bignum", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "MEMORY", NULL); return TCL_ERROR; } else { diff --git a/generic/tclVar.c b/generic/tclVar.c index f7ec7c8..bc94e73 100644 --- a/generic/tclVar.c +++ b/generic/tclVar.c @@ -301,7 +301,7 @@ TclVarHashCreateVar( Tcl_Obj *keyPtr; Var *varPtr; - keyPtr = Tcl_NewStringObj(key, -1); + keyPtr = Tcl_NewStringObj(key, TCL_INDEX_NONE); Tcl_IncrRefCount(keyPtr); varPtr = VarHashCreateVar(tablePtr, keyPtr, newPtr); Tcl_DecrRefCount(keyPtr); @@ -469,7 +469,7 @@ TclLookupVar( * is set to NULL. */ { Var *varPtr; - Tcl_Obj *part1Ptr = Tcl_NewStringObj(part1, -1); + Tcl_Obj *part1Ptr = Tcl_NewStringObj(part1, TCL_INDEX_NONE); if (createPart1) { Tcl_IncrRefCount(part1Ptr); @@ -551,7 +551,7 @@ TclObjLookupVar( Var *resPtr; if (part2) { - part2Ptr = Tcl_NewStringObj(part2, -1); + part2Ptr = Tcl_NewStringObj(part2, TCL_INDEX_NONE); if (createPart2) { Tcl_IncrRefCount(part2Ptr); } @@ -949,7 +949,7 @@ TclLookupSimpleVar( return NULL; } if (tail != varName) { - tailPtr = Tcl_NewStringObj(tail, -1); + tailPtr = Tcl_NewStringObj(tail, TCL_INDEX_NONE); } else { tailPtr = varNamePtr; } @@ -1173,10 +1173,10 @@ Tcl_GetVar2( * bits. */ { Tcl_Obj *resultPtr; - Tcl_Obj *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, -1); + Tcl_Obj *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, TCL_INDEX_NONE); if (part2) { - part2Ptr = Tcl_NewStringObj(part2, -1); + part2Ptr = Tcl_NewStringObj(part2, TCL_INDEX_NONE); Tcl_IncrRefCount(part2Ptr); } @@ -1226,10 +1226,10 @@ Tcl_GetVar2Ex( int flags) /* OR-ed combination of TCL_GLOBAL_ONLY, and * TCL_LEAVE_ERR_MSG bits. */ { - Tcl_Obj *resPtr, *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, -1); + Tcl_Obj *resPtr, *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, TCL_INDEX_NONE); if (part2) { - part2Ptr = Tcl_NewStringObj(part2, -1); + part2Ptr = Tcl_NewStringObj(part2, TCL_INDEX_NONE); Tcl_IncrRefCount(part2Ptr); } @@ -1547,7 +1547,7 @@ Tcl_SetVar2( * TCL_LEAVE_ERR_MSG. */ { Tcl_Obj *varValuePtr = Tcl_SetVar2Ex(interp, part1, part2, - Tcl_NewStringObj(newValue, -1), flags); + Tcl_NewStringObj(newValue, TCL_INDEX_NONE), flags); if (varValuePtr == NULL) { return NULL; @@ -1607,11 +1607,11 @@ Tcl_SetVar2Ex( * TCL_APPEND_VALUE, TCL_LIST_ELEMENT or * TCL_LEAVE_ERR_MSG. */ { - Tcl_Obj *resPtr, *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, -1); + Tcl_Obj *resPtr, *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, TCL_INDEX_NONE); Tcl_IncrRefCount(part1Ptr); if (part2) { - part2Ptr = Tcl_NewStringObj(part2, -1); + part2Ptr = Tcl_NewStringObj(part2, TCL_INDEX_NONE); Tcl_IncrRefCount(part2Ptr); } @@ -2291,10 +2291,10 @@ Tcl_UnsetVar2( * TCL_LEAVE_ERR_MSG. */ { int result; - Tcl_Obj *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, -1); + Tcl_Obj *part2Ptr = NULL, *part1Ptr = Tcl_NewStringObj(part1, TCL_INDEX_NONE); if (part2) { - part2Ptr = Tcl_NewStringObj(part2, -1); + part2Ptr = Tcl_NewStringObj(part2, TCL_INDEX_NONE); } /* @@ -3070,7 +3070,7 @@ ArrayForNRCmd( if (numVars != 2) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "must have two variable names", -1)); + "must have two variable names", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "SYNTAX", "array", "for", NULL); return TCL_ERROR; } @@ -3168,7 +3168,7 @@ ArrayForLoopCallback( Tcl_ResetResult(interp); if (done == TCL_ERROR) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "array changed during iteration", -1)); + "array changed during iteration", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "READ", "array", "for", NULL); varPtr->flags |= TCL_LEAVE_ERR_MSG; result = done; @@ -4048,7 +4048,7 @@ ArraySetCmd( } if (elemLen & 1) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "list must have an even number of elements", -1)); + "list must have an even number of elements", TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ARGUMENT", "FORMAT", NULL); return TCL_ERROR; } @@ -4218,10 +4218,10 @@ ArrayStatsCmd( stats = Tcl_HashStats((Tcl_HashTable *) varPtr->value.tablePtr); if (stats == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj( - "error reading array statistics", -1)); + "error reading array statistics", TCL_INDEX_NONE)); return TCL_ERROR; } - Tcl_SetObjResult(interp, Tcl_NewStringObj(stats, -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj(stats, TCL_INDEX_NONE)); Tcl_Free(stats); return TCL_OK; } diff --git a/unix/dltest/pkgb.c b/unix/dltest/pkgb.c index 651c132..750d270 100644 --- a/unix/dltest/pkgb.c +++ b/unix/dltest/pkgb.c @@ -84,7 +84,7 @@ Pkgb_UnsafeObjCmd( (void)objc; (void)objv; - return Tcl_EvalEx(interp, "list unsafe command invoked", -1, TCL_EVAL_GLOBAL); + return Tcl_EvalEx(interp, "list unsafe command invoked", TCL_INDEX_NONE, TCL_EVAL_GLOBAL); } static int diff --git a/unix/dltest/pkgc.c b/unix/dltest/pkgc.c index 8e9c829..582d457 100644 --- a/unix/dltest/pkgc.c +++ b/unix/dltest/pkgc.c @@ -81,7 +81,7 @@ Pkgc_UnsafeObjCmd( (void)objc; (void)objv; - Tcl_SetObjResult(interp, Tcl_NewStringObj("unsafe command invoked", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("unsafe command invoked", TCL_INDEX_NONE)); return TCL_OK; } diff --git a/unix/dltest/pkgd.c b/unix/dltest/pkgd.c index 1b97d4c..52ba968 100644 --- a/unix/dltest/pkgd.c +++ b/unix/dltest/pkgd.c @@ -81,7 +81,7 @@ Pkgd_UnsafeObjCmd( (void)objc; (void)objv; - Tcl_SetObjResult(interp, Tcl_NewStringObj("unsafe command invoked", -1)); + Tcl_SetObjResult(interp, Tcl_NewStringObj("unsafe command invoked", TCL_INDEX_NONE)); return TCL_OK; } diff --git a/unix/dltest/pkge.c b/unix/dltest/pkge.c index 26a4b79..5f0db9b 100644 --- a/unix/dltest/pkge.c +++ b/unix/dltest/pkge.c @@ -41,5 +41,5 @@ Pkge_Init( if (Tcl_InitStubs(interp, "8.5-", 0) == NULL) { return TCL_ERROR; } - return Tcl_EvalEx(interp, script, -1, 0); + return Tcl_EvalEx(interp, script, TCL_INDEX_NONE, 0); } diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c index b205061..c9d7c45 100644 --- a/unix/tclUnixFCmd.c +++ b/unix/tclUnixFCmd.c @@ -1648,7 +1648,7 @@ SetPermissionsAttribute( Tcl_Obj *modeObj; TclNewLiteralStringObj(modeObj, "0o"); - Tcl_AppendToObj(modeObj, modeStringPtr+scanned+1, -1); + Tcl_AppendToObj(modeObj, modeStringPtr+scanned+1, TCL_INDEX_NONE); result = Tcl_GetWideIntFromObj(NULL, modeObj, &mode); Tcl_DecrRefCount(modeObj); } -- cgit v0.12 From 9d2cc36a0e82c13737990341fdb1bb9cb8fa68ca Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 5 Mar 2023 21:09:46 +0000 Subject: Fix [57bfcf43dd]: Remove unreachable code in Tcl_SetWideIntObj() --- generic/tclObj.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/generic/tclObj.c b/generic/tclObj.c index 531a256..a6e7698 100644 --- a/generic/tclObj.c +++ b/generic/tclObj.c @@ -3040,19 +3040,13 @@ Tcl_SetWideIntObj( Tcl_Panic("%s called with shared object", "Tcl_SetWideIntObj"); } - if ((wideValue >= (Tcl_WideInt) LONG_MIN) - && (wideValue <= (Tcl_WideInt) LONG_MAX)) { - TclSetLongObj(objPtr, (long) wideValue); - } else { #ifndef TCL_WIDE_INT_IS_LONG + if ((wideValue < (Tcl_WideInt) LONG_MIN) + || (wideValue > (Tcl_WideInt) LONG_MAX)) { TclSetWideIntObj(objPtr, wideValue); -#else - mp_int big; - - TclBNInitBignumFromWideInt(&big, wideValue); - Tcl_SetBignumObj(objPtr, &big); + } else #endif - } + TclSetLongObj(objPtr, (long) wideValue); } /* -- cgit v0.12 From 40e214cd76ab0f9fe274bb7a27b56a40194254f7 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 6 Mar 2023 06:58:13 +0000 Subject: Add new valgrind suppression items. --- tools/valgrind_suppress | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/tools/valgrind_suppress b/tools/valgrind_suppress index fb7f173..11ca880 100644 --- a/tools/valgrind_suppress +++ b/tools/valgrind_suppress @@ -1,3 +1,17 @@ +#{ +# Tcl_GetChannelOption/TcpGetOptionProc/TcphostPortList/getnameinfo/gethostbyaddr_r +# Memcheck:Leak +# match-leak-kinds: reachable +# fun:malloc +# fun:strdup +# ... +# fun:module_load +# ... +# fun:getnameinfo +# ... +# fun:Tcl_GetChannelOption +#} + { TclCreatesocketAddress/getaddrinfo/calloc Memcheck:Leak @@ -11,6 +25,16 @@ { TclCreatesocketAddress/getaddrinfo/malloc Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:getaddrinfo + fun:TclCreateSocketAddress +} + +{ + TclCreatesocketAddress/getaddrinfo/malloc + Memcheck:Leak match-leak-kinds: reachable fun:malloc ... @@ -19,6 +43,18 @@ } { + TclpDlopen/decompose_rpath + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:decompose_rpath + ... + fun:dlopen_doit + ... + fun:TclpDlopen +} + +{ TclpDlopen/load Memcheck:Leak match-leak-kinds: reachable @@ -72,6 +108,46 @@ } { + TclpGeHostByName/gethostbyname_r/strdup/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/calloc + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ TclpGetPwNam/getpwname_r/__nss_next2/calloc Memcheck:Leak match-leak-kinds: reachable @@ -105,6 +181,57 @@ } { + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TcphostPortList/getnameinfo/module_load/calloc + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:calloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ + # see sourceware glibc Bug 14984 - getnameinfo() might be leaking memory + TcphostPortList/getnameinfo/module_load/mallco + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:malloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ TclpThreadExit/pthread_exit/calloc Memcheck:Leak match-leak-kinds: reachable @@ -124,3 +251,13 @@ fun:TclpThreadExit } +{ + TclpThreadExit/pthread_exit/malloc + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:pthread_exit + fun:TclpThreadExit +} + -- cgit v0.12 From fa795b478ac557afbf6511559553e279a046862a Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 6 Mar 2023 06:59:52 +0000 Subject: Add new valgrind suppression items. --- tools/valgrind_suppress | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/tools/valgrind_suppress b/tools/valgrind_suppress index fb7f173..11ca880 100644 --- a/tools/valgrind_suppress +++ b/tools/valgrind_suppress @@ -1,3 +1,17 @@ +#{ +# Tcl_GetChannelOption/TcpGetOptionProc/TcphostPortList/getnameinfo/gethostbyaddr_r +# Memcheck:Leak +# match-leak-kinds: reachable +# fun:malloc +# fun:strdup +# ... +# fun:module_load +# ... +# fun:getnameinfo +# ... +# fun:Tcl_GetChannelOption +#} + { TclCreatesocketAddress/getaddrinfo/calloc Memcheck:Leak @@ -11,6 +25,16 @@ { TclCreatesocketAddress/getaddrinfo/malloc Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:getaddrinfo + fun:TclCreateSocketAddress +} + +{ + TclCreatesocketAddress/getaddrinfo/malloc + Memcheck:Leak match-leak-kinds: reachable fun:malloc ... @@ -19,6 +43,18 @@ } { + TclpDlopen/decompose_rpath + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:decompose_rpath + ... + fun:dlopen_doit + ... + fun:TclpDlopen +} + +{ TclpDlopen/load Memcheck:Leak match-leak-kinds: reachable @@ -72,6 +108,46 @@ } { + TclpGeHostByName/gethostbyname_r/strdup/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/calloc + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ TclpGetPwNam/getpwname_r/__nss_next2/calloc Memcheck:Leak match-leak-kinds: reachable @@ -105,6 +181,57 @@ } { + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TcphostPortList/getnameinfo/module_load/calloc + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:calloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ + # see sourceware glibc Bug 14984 - getnameinfo() might be leaking memory + TcphostPortList/getnameinfo/module_load/mallco + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:malloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ TclpThreadExit/pthread_exit/calloc Memcheck:Leak match-leak-kinds: reachable @@ -124,3 +251,13 @@ fun:TclpThreadExit } +{ + TclpThreadExit/pthread_exit/malloc + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:pthread_exit + fun:TclpThreadExit +} + -- cgit v0.12 From 296e4767eaa58abc7f46c676e80546de26a997a2 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 6 Mar 2023 07:00:48 +0000 Subject: Add new valgrind suppression items. --- tools/valgrind_suppress | 137 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 137 insertions(+) diff --git a/tools/valgrind_suppress b/tools/valgrind_suppress index fb7f173..11ca880 100644 --- a/tools/valgrind_suppress +++ b/tools/valgrind_suppress @@ -1,3 +1,17 @@ +#{ +# Tcl_GetChannelOption/TcpGetOptionProc/TcphostPortList/getnameinfo/gethostbyaddr_r +# Memcheck:Leak +# match-leak-kinds: reachable +# fun:malloc +# fun:strdup +# ... +# fun:module_load +# ... +# fun:getnameinfo +# ... +# fun:Tcl_GetChannelOption +#} + { TclCreatesocketAddress/getaddrinfo/calloc Memcheck:Leak @@ -11,6 +25,16 @@ { TclCreatesocketAddress/getaddrinfo/malloc Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:getaddrinfo + fun:TclCreateSocketAddress +} + +{ + TclCreatesocketAddress/getaddrinfo/malloc + Memcheck:Leak match-leak-kinds: reachable fun:malloc ... @@ -19,6 +43,18 @@ } { + TclpDlopen/decompose_rpath + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:decompose_rpath + ... + fun:dlopen_doit + ... + fun:TclpDlopen +} + +{ TclpDlopen/load Memcheck:Leak match-leak-kinds: reachable @@ -72,6 +108,46 @@ } { + TclpGeHostByName/gethostbyname_r/strdup/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + fun:strdup + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/calloc + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ + TclpGeHostByName/gethostbyname_r/malloc + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:dl_open_worker + ... + fun:do_dlopen + ... + fun:TclpGetHostByName +} + +{ TclpGetPwNam/getpwname_r/__nss_next2/calloc Memcheck:Leak match-leak-kinds: reachable @@ -105,6 +181,57 @@ } { + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:calloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TclpGetGrGid/getgrgid_r/module_load + Memcheck:Leak + match-leak-kinds: reachable + fun:malloc + ... + fun:module_load + ... + fun:TclpGetGrGid +} + +{ + TcphostPortList/getnameinfo/module_load/calloc + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:calloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ + # see sourceware glibc Bug 14984 - getnameinfo() might be leaking memory + TcphostPortList/getnameinfo/module_load/mallco + Memcheck:Leak + match-leak-kinds: definite,reachable + fun:malloc + ... + fun:dl_open_worker_begin + ... + fun:module_load + ... + fun:getnameinfo + fun:TcpHostPortList +} + +{ TclpThreadExit/pthread_exit/calloc Memcheck:Leak match-leak-kinds: reachable @@ -124,3 +251,13 @@ fun:TclpThreadExit } +{ + TclpThreadExit/pthread_exit/malloc + Memcheck:Leak + match-leak-kinds: definite + fun:malloc + ... + fun:pthread_exit + fun:TclpThreadExit +} + -- cgit v0.12 From cfa443421bcf235f75def81bc137774aa0f20387 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Mar 2023 10:24:15 +0000 Subject: Tcl_WinTCharToUtf() is deprecated, so use Tcl_WCharToUtfDString() in stead. --- win/tclWinFile.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/win/tclWinFile.c b/win/tclWinFile.c index a54077d..c7159b7 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -1476,24 +1476,22 @@ TclpGetUserHome( */ ptr = TclpGetUserName(&ds); if (ptr != NULL && strcasecmp(name, ptr) == 0) { - HANDLE hProcess; - WCHAR buf[MAX_PATH]; - DWORD nChars = sizeof(buf) / sizeof(buf[0]); - /* Sadly GetCurrentProcessToken not in Win 7 so slightly longer */ - hProcess = GetCurrentProcess(); /* Need not be closed */ - if (hProcess) { - HANDLE hToken; - if (OpenProcessToken(hProcess, TOKEN_QUERY, &hToken)) { - if (GetUserProfileDirectoryW(hToken, buf, &nChars)) { - Tcl_WinTCharToUtf((TCHAR *)buf, - (nChars-1)*sizeof(WCHAR), - bufferPtr); - result = Tcl_DStringValue(bufferPtr); - rc = 1; - } - CloseHandle(hToken); - } - } + HANDLE hProcess; + WCHAR buf[MAX_PATH]; + DWORD nChars = sizeof(buf) / sizeof(buf[0]); + /* Sadly GetCurrentProcessToken not in Win 7 so slightly longer */ + hProcess = GetCurrentProcess(); /* Need not be closed */ + if (hProcess) { + HANDLE hToken; + if (OpenProcessToken(hProcess, TOKEN_QUERY, &hToken)) { + if (GetUserProfileDirectoryW(hToken, buf, &nChars)) { + Tcl_DStringInit(bufferPtr); + result = Tcl_WCharToUtfDString(buf, nChars-1, (bufferPtr)); + rc = 1; + } + CloseHandle(hToken); + } + } } Tcl_DStringFree(&ds); } else { -- cgit v0.12 From 3eb68691b82d5c02de6081180225f886b140926c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Mar 2023 10:30:41 +0000 Subject: ckfree() -> Tcl_Free() --- generic/tclTest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 6399e37..06d5064 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1197,7 +1197,7 @@ CmdDelProc0( } prevRefPtr = thisRefPtr; } - ckfree(refPtr); + Tcl_Free(refPtr); } static void -- cgit v0.12 From 5e095a3a4d445694e0a618ed20fe92d8fd34b637 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 6 Mar 2023 18:17:19 +0000 Subject: [b4af93cd9f] Proposed fix from apnadkarni. It works! --- unix/tclUnixSock.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/unix/tclUnixSock.c b/unix/tclUnixSock.c index 70dfc61..0be10ad 100644 --- a/unix/tclUnixSock.c +++ b/unix/tclUnixSock.c @@ -1033,10 +1033,10 @@ TcpGetOptionProc( if ((len == 0) || ((len > 1) && (optionName[1] == 'k') && (strncmp(optionName, "-keepalive", len) == 0))) { + int opt = 0; #if defined(SO_KEEPALIVE) - socklen_t size; + socklen_t size = sizeof(opt); #endif - int opt = 0; if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-keepalive"); @@ -1053,10 +1053,10 @@ TcpGetOptionProc( if ((len == 0) || ((len > 1) && (optionName[1] == 'n') && (strncmp(optionName, "-nodelay", len) == 0))) { + int opt = 0; #if defined(SOL_TCP) && defined(TCP_NODELAY) - socklen_t size; + socklen_t size = sizeof(opt); #endif - int opt = 0; if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-nodelay"); -- cgit v0.12 From f4450abcf989ed7ce06a977c8c12483762f00512 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Mar 2023 19:58:53 +0000 Subject: Proposed fix for [f3cb2a32d6]: uninitialized value in format-2.18 --- generic/tclStringObj.c | 1 + 1 file changed, 1 insertion(+) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 723d2e5..328e410 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -4849,6 +4849,7 @@ ExtendStringRepWithUnicode( copyBytes: dst = objPtr->bytes + origLength; + *dst = '\0'; for (i = 0; i < numChars; i++) { dst += Tcl_UniCharToUtf(unicode[i], dst); } -- cgit v0.12 From f5ba8a8478a966af91228ad54eb264c04c21b11d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 6 Mar 2023 21:01:45 +0000 Subject: Proposed fix for [95e287b956]: uninit value use in stringObj-4.2 --- tests/stringObj.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/stringObj.test b/tests/stringObj.test index dce932b..da379ba 100644 --- a/tests/stringObj.test +++ b/tests/stringObj.test @@ -66,8 +66,8 @@ test stringObj-4.2 {Tcl_SetObjLength procedure, string gets longer} testobj { testobj freeallvars teststringobj set 1 abcdef teststringobj setlength 1 10 - list [teststringobj length 1] [teststringobj length2 1] -} {10 10} + list [teststringobj length 1] +} 10 test stringObj-4.3 {Tcl_SetObjLength procedure, string gets longer} testobj { testobj freeallvars teststringobj set 1 abcdef -- cgit v0.12 -- cgit v0.12 From 1f6cec5ff3943450001a29bea3371dea9f23db7f Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Tue, 7 Mar 2023 02:52:26 +0000 Subject: Fix testchmod and associated tests that always failed on Windows --- tests/fCmd.test | 18 ++- tests/tcltest.test | 2 +- tests/winFCmd.test | 112 ++++++++++--------- win/tclWinTest.c | 316 ++++++++++++++++++++++++++++------------------------- 4 files changed, 239 insertions(+), 209 deletions(-) diff --git a/tests/fCmd.test b/tests/fCmd.test index dad1af9..ecb1d04 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -1065,6 +1065,7 @@ test fCmd-10.3.1 {file copy: comprehensive: dir to new name} -setup { file mkdir [file join td1 tdx] file mkdir [file join td2 tdy] testchmod 0o555 td2 + testchmod 0o555 td2/tdy; # Above line removes inherited perms. So restore. file copy td1 td3 file copy td2 td4 list [lsort [glob td*]] [glob -directory td3 t*] \ @@ -1086,10 +1087,19 @@ test fCmd-10.4 {file copy: comprehensive: file to existing file} -setup { createfile tfd2 createfile tfd3 createfile tfd4 - testchmod 0o444 tfs3 - testchmod 0o444 tfs4 - testchmod 0o444 tfd2 - testchmod 0o444 tfd4 + if {$::tcl_platform(platform) eq "windows"} { + # On Windows testchmode will attach an ACL which file copy cannot handle + # so use good old attributes which file copy does understand + file attribute tfs3 -readonly 1 + file attribute tfs4 -readonly 1 + file attribute tfd2 -readonly 1 + file attribute tfd4 -readonly 1 + } else { + testchmod 0o444 tfs3 + testchmod 0o444 tfs4 + testchmod 0o444 tfd2 + testchmod 0o444 tfd4 + } set msg [list [catch {file copy tf1 tf2} msg] $msg] file copy -force tfs1 tfd1 file copy -force tfs2 tfd2 diff --git a/tests/tcltest.test b/tests/tcltest.test index 8a0174d..9da14de 100644 --- a/tests/tcltest.test +++ b/tests/tcltest.test @@ -552,7 +552,7 @@ switch -- $::tcl_platform(platform) { default { # note in FAT/NTFS we won't be able to protect directory with read-only attribute... catch {file attributes $notWriteableDir -readonly 1} - catch {testchmod 0 $notWriteableDir} + catch {testchmod 0o444 $notWriteableDir} } } test tcltest-8.3 {tcltest a.tcl -tmpdir notReadableDir} { diff --git a/tests/winFCmd.test b/tests/winFCmd.test index 500b114..b146253 100644 --- a/tests/winFCmd.test +++ b/tests/winFCmd.test @@ -47,15 +47,20 @@ proc contents {file} { set r } +proc cleanupRecurse {args} { + # Assumes no loops via links! + # Need to change permissions BEFORE deletion + testchmod 0o777 {*}$args + foreach victim $args { + if {[file isdirectory $victim]} { + cleanupRecurse {*}[glob -nocomplain -directory $victim td* tf* Test*] + } + file delete -force $victim + } +} proc cleanup {args} { - foreach p ". $args" { - set x "" - catch { - set x [glob -directory $p tf* td*] - } - if {$x != ""} { - catch {file delete -force -- {*}$x} - } + foreach p [list [pwd] {*}$args] { + cleanupRecurse {*}[glob -nocomplain -directory $p tf* td*] } } @@ -415,12 +420,12 @@ test winFCmd-1.38 {TclpRenameFile: check rename of conflicting inodes} -setup { cleanup } -constraints {win winNonZeroInodes notInCIenv} -body { file mkdir td1 - foreach {a b} [MakeFiles td1] break + lassign [MakeFiles td1] a b file rename -force $a $b file exists $a } -cleanup { cleanup -} -result {0} +} -result 0 test winFCmd-2.1 {TclpCopyFile: errno: EACCES} -setup { @@ -496,11 +501,11 @@ test winFCmd-2.12 {TclpCopyFile: CopyFile succeeds} -setup { cleanup } -constraints {win testfile} -body { createfile tf1 tf1 - testchmod 0 tf1 + file attribute tf1 -readonly 1 testfile cp tf1 tf2 list [contents tf2] [file writable tf2] } -cleanup { - catch {testchmod 0o666 tf1} + testchmod 0o660 tf1 cleanup } -result {tf1 0} test winFCmd-2.13 {TclpCopyFile: CopyFile fails} -setup { @@ -542,11 +547,10 @@ test winFCmd-2.17 {TclpCopyFile: dst is readonly} -setup { } -constraints {win testfile testchmod} -body { createfile tf1 tf1 createfile tf2 tf2 - testchmod 0 tf2 + file attribute tf2 -readonly 1 testfile cp tf1 tf2 list [file writable tf2] [contents tf2] } -cleanup { - catch {testchmod 0o666 tf2} cleanup } -result {1 tf1} @@ -624,7 +628,6 @@ test winFCmd-3.11 {TclpDeleteFile: still can't remove path} -setup { testfile rm tf1 } -cleanup { close $fd - catch {testchmod 0o666 tf1} cleanup } -returnCodes error -result EACCES @@ -664,14 +667,17 @@ test winFCmd-5.1 {TclpCopyDirectory: calls TraverseWinTree} -setup { test winFCmd-6.1 {TclpRemoveDirectory: errno: EACCES} -setup { cleanup } -constraints {winVista testfile testchmod notInCIenv} -body { - file mkdir td1 - testchmod 0 td1 - testfile rmdir td1 - file exists td1 + # Parent's FILE_DELETE_CHILD setting permits deletion of subdir + # even when subdir DELETE mask is clear. So we need an intermediate + # parent td0 with FILE_DELETE_CHILD turned off while allowing R/W. + file mkdir td0/td1 + testchmod 0o777 td0 + testchmod 0 td0/td1 + testfile rmdir td0/td1 + file exists td0/td1 } -returnCodes error -cleanup { - catch {testchmod 0o666 td1} cleanup -} -result {td1 EACCES} +} -result {td0/td1 EACCES} # This next test has a very hokey way of matching... test winFCmd-6.2 {TclpRemoveDirectory: errno: EEXIST} -setup { cleanup @@ -679,7 +685,7 @@ test winFCmd-6.2 {TclpRemoveDirectory: errno: EEXIST} -setup { file mkdir td1/td2 list [catch {testfile rmdir td1} msg] [file tail $msg] } -result {1 {td1 EEXIST}} -test winFCmd-6.3 {TclpRemoveDirectory: errno: EACCES} {win emptyTest} { +test winFCmd-6.3 {TclpRemoveDirectory: errno: EACCES} {win emptyTest trashSystem} { # can't test this w/o removing everything on your hard disk first! # testfile rmdir / } {} @@ -715,17 +721,7 @@ test winFCmd-6.8 {TclpRemoveDirectory: RemoveDirectory fails} -setup { createfile tf1 list [catch {testfile rmdir tf1} msg] [file tail $msg] } -result {1 {tf1 ENOTDIR}} -test winFCmd-6.9 {TclpRemoveDirectory: errno == EACCES} -setup { - cleanup -} -constraints {winVista testfile testchmod notInCIenv} -body { - file mkdir td1 - testchmod 0 td1 - testfile rmdir td1 - file exists td1 -} -returnCodes error -cleanup { - catch {testchmod 0o666 td1} - cleanup -} -result {td1 EACCES} +# winFCmd-6.9 removed - was exact dup of winFCmd-6.1 test winFCmd-6.11 {TclpRemoveDirectory: attr == -1} -setup { cleanup } -constraints {win nt testfile} -body { @@ -736,14 +732,18 @@ test winFCmd-6.11 {TclpRemoveDirectory: attr == -1} -setup { test winFCmd-6.13 {TclpRemoveDirectory: write-protected} -setup { cleanup } -constraints {winVista testfile testchmod notInCIenv} -body { - file mkdir td1 - testchmod 0 td1 - testfile rmdir td1 - file exists td1 -} -cleanup { - catch {testchmod 0o666 td1} - cleanup -} -returnCodes error -result {td1 EACCES} + # Parent's FILE_DELETE_CHILD setting permits deletion of subdir + # even when subdir DELETE mask is clear. So we need an intermediate + # parent td0 with FILE_DELETE_CHILD turned off while allowing R/W. + file mkdir td0/td1 + testchmod 0o770 td0 + testchmod 0o444 td0/td1 + testfile rmdir td0/td1 + file exists td0/td1 +} -cleanup { + testchmod 0o770 td0/td1 + cleanup +} -returnCodes error -result {td0/td1 EACCES} # This next test has a very hokey way of matching... test winFCmd-6.15 {TclpRemoveDirectory: !recursive} -setup { cleanup @@ -837,11 +837,12 @@ test winFCmd-7.11 {TraverseWinTree: call TraversalCopy: DOTREE_PRED} -setup { } -constraints {win testfile testchmod} -body { file mkdir td1 createfile td1/tf1 tf1 - testchmod 0 td1 + testchmod 0o770 td1/tf1; # Else tf2 will have no ACL after td1 testchmod + testchmod 0o400 td1 testfile cpdir td1 td2 list [file exists td2] [file writable td2] } -cleanup { - catch {testchmod 0o666 td1} + testchmod 0o660 td1 cleanup } -result {1 1} test winFCmd-7.12 {TraverseWinTree: call TraversalDelete: DOTREE_PRED} -setup { @@ -908,11 +909,12 @@ test winFCmd-7.19 {TraverseWinTree: call TraversalCopy: DOTREE_POSTD} -setup { } -constraints {win testfile testchmod} -body { file mkdir td1 createfile td1/tf1 tf1 - testchmod 0 td1 + testchmod 0o770 td1/tf1; # Else tf2 will have no ACL after td1 testchmod + testchmod 0o400 td1 testfile cpdir td1 td2 list [file exists td2] [file writable td2] } -cleanup { - catch {testchmod 0o666 td1} + testchmod 0o660 td1 cleanup } -result {1 1} test winFCmd-7.20 {TraverseWinTree: call TraversalDelete: DOTREE_POSTD} -setup { @@ -939,11 +941,12 @@ test winFCmd-8.2 {TraversalCopy: DOTREE_PRED} -setup { cleanup } -constraints {win testfile testchmod} -body { file mkdir td1/td2 - testchmod 0 td1 + testchmod 0o770 td1/td2; # Else td2 will have no ACL after td1 testchmod + testchmod 0o400 td1 testfile cpdir td1 td2 list [file writable td1] [file writable td1/td2] } -cleanup { - catch {testchmod 0o666 td1} + testchmod 0o660 td1 cleanup } -result {0 1} test winFCmd-8.3 {TraversalCopy: DOTREE_POSTD} -setup { @@ -965,14 +968,18 @@ test winFCmd-9.1 {TraversalDelete: DOTREE_F} -setup { test winFCmd-9.3 {TraversalDelete: DOTREE_PRED} -setup { cleanup } -constraints {winVista testfile testchmod notInCIenv} -body { - file mkdir td1/td2 - testchmod 0 td1 - testfile rmdir -force td1 + # Parent's FILE_DELETE_CHILD setting permits deletion of subdir + # even when subdir DELETE mask is clear. So we need an intermediate + # parent td0 with FILE_DELETE_CHILD turned off while allowing R/W. + file mkdir td0/td1/td2 + testchmod 0o770 td0 + testchmod 0o400 td0/td1 + testfile rmdir -force td0/td1 file exists td1 } -cleanup { - catch {testchmod 0o666 td1} + testchmod 0o770 td0/td1 cleanup -} -returnCodes error -result {td1 EACCES} +} -returnCodes error -result {td0/td1 EACCES} test winFCmd-9.4 {TraversalDelete: DOTREE_POSTD} -setup { cleanup } -constraints {win testfile} -body { @@ -1471,7 +1478,6 @@ test winFCmd-19.9 {Windows devices path names} -constraints {win nt} -body { # } #} -# cleanup cleanup ::tcltest::cleanupTests return diff --git a/win/tclWinTest.c b/win/tclWinTest.c index 357bbc5..0b4c8f6 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -17,9 +17,8 @@ /* * For TestplatformChmod on Windows */ -#ifdef _WIN32 #include -#endif +#include /* * MinGW 3.4.2 does not define this. @@ -416,176 +415,190 @@ TestExceptionCmd( return TCL_OK; } +/* + * This "chmod" works sufficiently for test script purposes. Do not expect + * it to be exact emulation of Unix chmod (not sure if that's even possible) + */ static int TestplatformChmod( const char *nativePath, int pmode) { - static const SECURITY_INFORMATION infoBits = OWNER_SECURITY_INFORMATION - | GROUP_SECURITY_INFORMATION | DACL_SECURITY_INFORMATION; - /* don't reset change permissions mask (WRITE_DAC, allow test-cases restore it to cleanup) */ - static const DWORD readOnlyMask = FILE_DELETE_CHILD | FILE_ADD_FILE - | FILE_ADD_SUBDIRECTORY | FILE_WRITE_EA | FILE_APPEND_DATA - | FILE_WRITE_DATA - | DELETE; - - /* - * References to security functions (only available on NT and later). + /* + * Note FILE_DELETE_CHILD missing from dirWriteMask because we do + * not want overriding of child's delete setting when testing */ - - const BOOL set_readOnly = !(pmode & 0222); - BOOL acl_readOnly_found = FALSE, curAclPresent, curAclDefaulted; - SID_IDENTIFIER_AUTHORITY userSidAuthority = { - SECURITY_WORLD_SID_AUTHORITY - }; - BYTE *secDesc = 0; - DWORD secDescLen, attr, newAclSize; - ACL_SIZE_INFORMATION ACLSize; - PACL curAcl, newAcl = 0; - WORD j; - SID *userSid = 0; - char *userDomain = 0; + static const DWORD dirWriteMask = + FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | + FILE_ADD_FILE | FILE_ADD_SUBDIRECTORY | STANDARD_RIGHTS_WRITE | DELETE | + SYNCHRONIZE; + static const DWORD dirReadMask = + FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_LIST_DIRECTORY | + STANDARD_RIGHTS_READ | SYNCHRONIZE; + /* Note - default user privileges allow ignoring TRAVERSE setting */ + static const DWORD dirExecuteMask = + FILE_TRAVERSE | STANDARD_RIGHTS_READ | SYNCHRONIZE; + + static const DWORD fileWriteMask = + FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_WRITE_DATA | + FILE_APPEND_DATA | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; + static const DWORD fileReadMask = + FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_READ_DATA | + STANDARD_RIGHTS_READ | SYNCHRONIZE; + static const DWORD fileExecuteMask = + FILE_EXECUTE | STANDARD_RIGHTS_READ | SYNCHRONIZE; + + DWORD attr, newAclSize; + PACL newAcl = NULL; int res = 0; - - /* - * Process the chmod request. - */ + SID_IDENTIFIER_AUTHORITY worldAuthority = SECURITY_WORLD_SID_AUTHORITY; + + HANDLE hToken = NULL; + int i; + int nSids = 0; + struct { + PSID pSid; + DWORD mask; + DWORD sidLen; + } aceEntry[3]; + DWORD dw; + int isDir; + TOKEN_USER *pTokenUser = NULL; + + res = -1; /* Assume failure */ attr = GetFileAttributesA(nativePath); - - /* - * nativePath not found - */ - if (attr == 0xFFFFFFFF) { - res = -1; - goto done; + goto done; /* Not found */ } - /* - * If nativePath is not a directory, there is no special handling. - */ + isDir = (attr & FILE_ATTRIBUTE_DIRECTORY) != 0; - if (!(attr & FILE_ATTRIBUTE_DIRECTORY)) { + if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) { goto done; } - - /* - * Set the result to error, if the ACL change is successful it will be - * reset to 0. - */ - - res = -1; - - /* - * Read the security descriptor for the directory. Note the first call - * obtains the size of the security descriptor. - */ - - if (!GetFileSecurityA(nativePath, infoBits, NULL, 0, &secDescLen)) { - DWORD secDescLen2 = 0; - - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - goto done; - } - - secDesc = ckalloc(secDescLen); - if (!GetFileSecurityA(nativePath, infoBits, - (PSECURITY_DESCRIPTOR) secDesc, secDescLen, &secDescLen2) - || (secDescLen < secDescLen2)) { - goto done; - } - } - - /* - * Get the World SID. - */ - - userSid = ckalloc(GetSidLengthRequired((UCHAR) 1)); - InitializeSid(userSid, &userSidAuthority, (BYTE) 1); - *(GetSidSubAuthority(userSid, 0)) = SECURITY_WORLD_RID; - - /* - * If curAclPresent == false then curAcl and curAclDefaulted not valid. - */ - - if (!GetSecurityDescriptorDacl((PSECURITY_DESCRIPTOR) secDesc, - &curAclPresent, &curAcl, &curAclDefaulted)) { + + /* Get process SID */ + if (!GetTokenInformation(hToken, TokenUser, NULL, 0, &dw) && + GetLastError() != ERROR_INSUFFICIENT_BUFFER) { goto done; } - if (!curAclPresent || !curAcl) { - ACLSize.AclBytesInUse = 0; - ACLSize.AceCount = 0; - } else if (!GetAclInformation(curAcl, &ACLSize, sizeof(ACLSize), - AclSizeInformation)) { + pTokenUser = ckalloc(dw); + if (!GetTokenInformation(hToken, TokenUser, pTokenUser, dw, &dw)) { goto done; } - - /* - * Allocate memory for the new ACL. - */ - - newAclSize = ACLSize.AclBytesInUse + sizeof(ACCESS_DENIED_ACE) - + GetLengthSid(userSid) - sizeof(DWORD); - newAcl = ckalloc(newAclSize); - - /* - * Initialize the new ACL. - */ - - if (!InitializeAcl(newAcl, newAclSize, ACL_REVISION)) { + aceEntry[nSids].sidLen = GetLengthSid(pTokenUser->User.Sid); + aceEntry[nSids].pSid = ckalloc(aceEntry[nSids].sidLen); + if (!CopySid(aceEntry[nSids].sidLen, + aceEntry[nSids].pSid, + pTokenUser->User.Sid)) { + ckfree(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ goto done; } - - /* - * Add denied to make readonly, this will be known as a "read-only tag". + /* + * Always include DACL modify rights so we don't get locked out */ - - if (set_readOnly && !AddAccessDeniedAce(newAcl, ACL_REVISION, - readOnlyMask, userSid)) { - goto done; + aceEntry[nSids].mask = READ_CONTROL | WRITE_DAC | WRITE_OWNER | SYNCHRONIZE | + FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES; + if (pmode & 0700) { + /* Owner permissions. Assumes current process is owner */ + if (pmode & 0400) { + aceEntry[nSids].mask |= isDir ? dirReadMask : fileReadMask; + } + if (pmode & 0200) { + aceEntry[nSids].mask |= isDir ? dirWriteMask : fileWriteMask; + } + if (pmode & 0100) { + aceEntry[nSids].mask |= isDir ? dirExecuteMask : fileExecuteMask; + } } + ++nSids; + + if (pmode & 0070) { + /* Group permissions. */ - acl_readOnly_found = FALSE; - for (j = 0; j < ACLSize.AceCount; j++) { - LPVOID pACE2; - ACE_HEADER *phACE2; + TOKEN_PRIMARY_GROUP *pTokenGroup; - if (!GetAce(curAcl, j, &pACE2)) { + /* Get primary group SID */ + if (!GetTokenInformation( + hToken, TokenPrimaryGroup, NULL, 0, &dw) && + GetLastError() != ERROR_INSUFFICIENT_BUFFER) { goto done; } + pTokenGroup = ckalloc(dw); + if (!GetTokenInformation(hToken, TokenPrimaryGroup, pTokenGroup, dw, &dw)) { + ckfree(pTokenGroup); + goto done; + } + aceEntry[nSids].sidLen = GetLengthSid(pTokenGroup->PrimaryGroup); + aceEntry[nSids].pSid = ckalloc(aceEntry[nSids].sidLen); + if (!CopySid(aceEntry[nSids].sidLen, aceEntry[nSids].pSid, pTokenGroup->PrimaryGroup)) { + ckfree(pTokenGroup); + ckfree(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ + goto done; + } + ckfree(pTokenGroup); - phACE2 = (ACE_HEADER *) pACE2; + /* Generate mask for group ACL */ - /* - * Do NOT propagate inherited ACEs. - */ - - if (phACE2->AceFlags & INHERITED_ACE) { - continue; + aceEntry[nSids].mask = 0; + if (pmode & 0040) { + aceEntry[nSids].mask |= isDir ? dirReadMask : fileReadMask; + } + if (pmode & 0020) { + aceEntry[nSids].mask |= isDir ? dirWriteMask : fileWriteMask; + } + if (pmode & 0010) { + aceEntry[nSids].mask |= isDir ? dirExecuteMask : fileExecuteMask; } + ++nSids; + } - /* - * Skip the "read-only tag" restriction (either added above, or it is - * being removed). - */ + if (pmode & 0007) { + /* World permissions */ + PSID pWorldSid; + if (!ConvertStringSidToSidA("S-1-1-0", &pWorldSid)) { + goto done; + } + aceEntry[nSids].sidLen = GetLengthSid(pWorldSid); + aceEntry[nSids].pSid = ckalloc(aceEntry[nSids].sidLen); + if (!CopySid(aceEntry[nSids].sidLen, aceEntry[nSids].pSid, pWorldSid)) { + LocalFree(pWorldSid); + ckfree(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ + goto done; + } + LocalFree(pWorldSid); - if (phACE2->AceType == ACCESS_DENIED_ACE_TYPE) { - ACCESS_DENIED_ACE *pACEd = (ACCESS_DENIED_ACE *) phACE2; + /* Generate mask for world ACL */ - if (pACEd->Mask == readOnlyMask - && EqualSid(userSid, (PSID) &pACEd->SidStart)) { - acl_readOnly_found = TRUE; - continue; - } + aceEntry[nSids].mask = 0; + if (pmode & 0004) { + aceEntry[nSids].mask |= isDir ? dirReadMask : fileReadMask; } + if (pmode & 0002) { + aceEntry[nSids].mask |= isDir ? dirWriteMask : fileWriteMask; + } + if (pmode & 0001) { + aceEntry[nSids].mask |= isDir ? dirExecuteMask : fileExecuteMask; + } + ++nSids; + } - /* - * Copy the current ACE from the old to the new ACL. - */ + /* Allocate memory and initialize the new ACL. */ - if (!AddAce(newAcl, ACL_REVISION, MAXDWORD, (PACL *) pACE2, - ((PACE_HEADER) pACE2)->AceSize)) { + newAclSize = sizeof(ACL); + /* Add in size required for each ACE entry in the ACL */ + for (i = 0; i < nSids; ++i) { + newAclSize += + offsetof(ACCESS_ALLOWED_ACE, SidStart) + aceEntry[i].sidLen; + } + newAcl = ckalloc(newAclSize); + if (!InitializeAcl(newAcl, newAclSize, ACL_REVISION)) { + goto done; + } + + for (i = 0; i < nSids; ++i) { + if (!AddAccessAllowedAce(newAcl, ACL_REVISION, aceEntry[i].mask, aceEntry[i].pSid)) { goto done; } } @@ -595,35 +608,36 @@ TestplatformChmod( * to remove inherited ACL (we need to overwrite the default ACL's in this case) */ - if (set_readOnly == acl_readOnly_found || SetNamedSecurityInfoA( - (LPSTR) nativePath, SE_FILE_OBJECT, - DACL_SECURITY_INFORMATION /*| PROTECTED_DACL_SECURITY_INFORMATION*/, - NULL, NULL, newAcl, NULL) == ERROR_SUCCESS) { + if (SetNamedSecurityInfoA((LPSTR)nativePath, + SE_FILE_OBJECT, + DACL_SECURITY_INFORMATION | + PROTECTED_DACL_SECURITY_INFORMATION, + NULL, + NULL, + newAcl, + NULL) == ERROR_SUCCESS) { res = 0; } done: - if (secDesc) { - ckfree(secDesc); + if (pTokenUser) { + ckfree(pTokenUser); + } + if (hToken) { + CloseHandle(hToken); } if (newAcl) { ckfree(newAcl); } - if (userSid) { - ckfree(userSid); - } - if (userDomain) { - ckfree(userDomain); + for (i = 0; i < nSids; ++i) { + ckfree(aceEntry[i].pSid); } if (res != 0) { return res; } - /* - * Run normal chmod command. - */ - + /* Run normal chmod command */ return chmod(nativePath, pmode); } -- cgit v0.12 From 5cf1eed9106acd1a6e751b414506b0e38f6a79a7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 7 Mar 2023 17:56:45 +0000 Subject: Fix a few -Wconversion warnings --- generic/tclDecls.h | 2 +- win/tclWin32Dll.c | 8 +++--- win/tclWinChan.c | 47 +++++++++++++++++++--------------- win/tclWinConsole.c | 20 ++++++++++----- win/tclWinFile.c | 74 ++++++++++++++++++++++++++--------------------------- 5 files changed, 81 insertions(+), 70 deletions(-) diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 8fc926c..6c109de 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4042,7 +4042,7 @@ extern const TclStubs *tclStubsPtr; _t.reserved = -1; \ tclStubsPtr->tcl_GetTime((&_t.now)); \ if (_t.reserved != -1) { \ - _t.now.usec = _t.reserved; \ + _t.now.usec = (long) _t.reserved; \ } \ *(t) = _t.now; \ } while (0) diff --git a/win/tclWin32Dll.c b/win/tclWin32Dll.c index 9e83b46..d418b56 100644 --- a/win/tclWin32Dll.c +++ b/win/tclWin32Dll.c @@ -433,7 +433,7 @@ TclWinDriveLetterForVolMountPoint( if (!alreadyStored) { dlPtr2 = (MountPointMap *)ckalloc(sizeof(MountPointMap)); dlPtr2->volumeName = (WCHAR *)TclNativeDupInternalRep(Target); - dlPtr2->driveLetter = (char) drive[0]; + dlPtr2->driveLetter = (WCHAR) drive[0]; dlPtr2->nextPtr = driveLetterLookup; driveLetterLookup = dlPtr2; } @@ -459,7 +459,7 @@ TclWinDriveLetterForVolMountPoint( dlPtr2 = (MountPointMap *)ckalloc(sizeof(MountPointMap)); dlPtr2->volumeName = (WCHAR *)TclNativeDupInternalRep((void *)mountPoint); - dlPtr2->driveLetter = -1; + dlPtr2->driveLetter = (WCHAR)-1; dlPtr2->nextPtr = driveLetterLookup; driveLetterLookup = dlPtr2; Tcl_MutexUnlock(&mountPointMap); @@ -600,7 +600,7 @@ Tcl_WinTCharToUtf( return NULL; } if (len < 0) { - len = wcslen((WCHAR *)string); + len = (int)wcslen((WCHAR *)string); } else { len /= 2; } @@ -663,7 +663,7 @@ TclWinCPUID( #if defined(HAVE_INTRIN_H) && defined(_WIN64) && defined(HAVE_CPUID) - __cpuid((int *)regsPtr, index); + __cpuid((int *)regsPtr, (int)index); status = TCL_OK; #elif defined(__GNUC__) && defined(HAVE_CPUID) diff --git a/win/tclWinChan.c b/win/tclWinChan.c index 573ac7d..3a3eba4 100644 --- a/win/tclWinChan.c +++ b/win/tclWinChan.c @@ -174,6 +174,8 @@ static void FileChannelExitHandler( ClientData clientData) /* Old window proc */ { + (void)clientData; + Tcl_DeleteEventSource(FileSetupProc, FileCheckProc, NULL); } @@ -202,6 +204,7 @@ FileSetupProc( FileInfo *infoPtr; Tcl_Time blockTime = { 0, 0 }; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); + (void)data; if (!(flags & TCL_FILE_EVENTS)) { return; @@ -245,6 +248,7 @@ FileCheckProc( FileEvent *evPtr; FileInfo *infoPtr; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); + (void)data; if (!(flags & TCL_FILE_EVENTS)) { return; @@ -259,7 +263,7 @@ FileCheckProc( infoPtr = infoPtr->nextPtr) { if (infoPtr->watchMask && !(infoPtr->flags & FILE_PENDING)) { infoPtr->flags |= FILE_PENDING; - evPtr = ckalloc(sizeof(FileEvent)); + evPtr = (FileEvent *)ckalloc(sizeof(FileEvent)); evPtr->header.proc = FileEventProc; evPtr->infoPtr = infoPtr; Tcl_QueueEvent((Tcl_Event *) evPtr, TCL_QUEUE_TAIL); @@ -342,7 +346,7 @@ FileBlockProc( int mode) /* TCL_MODE_BLOCKING or * TCL_MODE_NONBLOCKING. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; /* * Files on Windows can not be switched between blocking and nonblocking, @@ -380,10 +384,11 @@ FileCloseProc( ClientData instanceData, /* Pointer to FileInfo structure. */ Tcl_Interp *interp) /* Not used. */ { - FileInfo *fileInfoPtr = instanceData; + FileInfo *fileInfoPtr = (FileInfo *)instanceData; FileInfo *infoPtr; ThreadSpecificData *tsdPtr; int errorCode = 0; + (void)interp; /* * Remove the file from the watch list. @@ -467,7 +472,7 @@ FileSeekProc( int mode, /* Relative to where should we seek? */ int *errorCodePtr) /* To store error code. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; LONG newPos, newPosHigh, oldPos, oldPosHigh; DWORD moveMethod; @@ -485,7 +490,7 @@ FileSeekProc( */ oldPosHigh = 0; - oldPos = SetFilePointer(infoPtr->handle, 0, &oldPosHigh, FILE_CURRENT); + oldPos = (int)SetFilePointer(infoPtr->handle, 0, &oldPosHigh, FILE_CURRENT); if (oldPos == (LONG)INVALID_SET_FILE_POINTER) { DWORD winError = GetLastError(); @@ -497,7 +502,7 @@ FileSeekProc( } newPosHigh = (offset < 0 ? -1 : 0); - newPos = SetFilePointer(infoPtr->handle, offset, &newPosHigh, moveMethod); + newPos = (int)SetFilePointer(infoPtr->handle, offset, &newPosHigh, moveMethod); if (newPos == (LONG)INVALID_SET_FILE_POINTER) { DWORD winError = GetLastError(); @@ -545,7 +550,7 @@ FileWideSeekProc( int mode, /* Relative to where should we seek? */ int *errorCodePtr) /* To store error code. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; DWORD moveMethod; LONG newPos, newPosHigh; @@ -559,7 +564,7 @@ FileWideSeekProc( } newPosHigh = Tcl_WideAsLong(offset >> 32); - newPos = SetFilePointer(infoPtr->handle, Tcl_WideAsLong(offset), + newPos = (int)SetFilePointer(infoPtr->handle, Tcl_WideAsLong(offset), &newPosHigh, moveMethod); if (newPos == (LONG)INVALID_SET_FILE_POINTER) { DWORD winError = GetLastError(); @@ -594,7 +599,7 @@ FileTruncateProc( ClientData instanceData, /* File state. */ Tcl_WideInt length) /* Length to truncate at. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; LONG newPos, newPosHigh, oldPos, oldPosHigh; /* @@ -602,7 +607,7 @@ FileTruncateProc( */ oldPosHigh = 0; - oldPos = SetFilePointer(infoPtr->handle, 0, &oldPosHigh, FILE_CURRENT); + oldPos = (int)SetFilePointer(infoPtr->handle, 0, &oldPosHigh, FILE_CURRENT); if (oldPos == (LONG)INVALID_SET_FILE_POINTER) { DWORD winError = GetLastError(); if (winError != NO_ERROR) { @@ -616,7 +621,7 @@ FileTruncateProc( */ newPosHigh = Tcl_WideAsLong(length >> 32); - newPos = SetFilePointer(infoPtr->handle, Tcl_WideAsLong(length), + newPos = (int)SetFilePointer(infoPtr->handle, Tcl_WideAsLong(length), &newPosHigh, FILE_BEGIN); if (newPos == (LONG)INVALID_SET_FILE_POINTER) { DWORD winError = GetLastError(); @@ -670,7 +675,7 @@ FileInputProc( int bufSize, /* Num bytes available in buffer. */ int *errorCode) /* Where to store error code. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; DWORD bytesRead; *errorCode = 0; @@ -689,7 +694,7 @@ FileInputProc( if (ReadFile(infoPtr->handle, (LPVOID) buf, (DWORD) bufSize, &bytesRead, (LPOVERLAPPED) NULL) != FALSE) { - return bytesRead; + return (int)bytesRead; } TclWinConvertError(GetLastError()); @@ -725,7 +730,7 @@ FileOutputProc( int toWrite, /* How many bytes to write? */ int *errorCode) /* Where to store error code. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; DWORD bytesWritten; *errorCode = 0; @@ -746,7 +751,7 @@ FileOutputProc( return -1; } infoPtr->dirty = 1; - return bytesWritten; + return (int)bytesWritten; } /* @@ -772,7 +777,7 @@ FileWatchProc( * of TCL_READABLE, TCL_WRITABLE and * TCL_EXCEPTION. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; Tcl_Time blockTime = { 0, 0 }; /* @@ -810,7 +815,7 @@ FileGetHandleProc( int direction, /* TCL_READABLE or TCL_WRITABLE */ ClientData *handlePtr) /* Where to store the handle. */ { - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = (FileInfo *)instanceData; if (direction & infoPtr->validMask) { *handlePtr = (ClientData) infoPtr->handle; @@ -855,7 +860,7 @@ TclpOpenFileChannel( char channelName[16 + TCL_INTEGER_SPACE]; TclFile readFile = NULL, writeFile = NULL; - nativeName = Tcl_FSGetNativePath(pathPtr); + nativeName = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); if (nativeName == NULL) { if (interp != (Tcl_Interp *) NULL) { Tcl_AppendResult(interp, "couldn't open \"", @@ -1363,7 +1368,7 @@ TclWinOpenFileChannel( } } - infoPtr = ckalloc(sizeof(FileInfo)); + infoPtr = (FileInfo *)ckalloc(sizeof(FileInfo)); /* * TIP #218. Removed the code inserting the new structure into the global @@ -1454,7 +1459,7 @@ FileThreadActionProc( int action) { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - FileInfo *infoPtr = instanceData; + FileInfo *infoPtr = ( FileInfo *)instanceData; if (action == TCL_CHANNEL_THREAD_INSERT) { infoPtr->nextPtr = tsdPtr->firstFilePtr; @@ -1557,7 +1562,7 @@ NativeIsComPort( const WCHAR *nativePath) /* Path of file to access, native encoding. */ { const WCHAR *p = (const WCHAR *) nativePath; - int i, len = wcslen(p); + int i, len = (int)wcslen(p); /* * 1. Look for com[1-9]:? diff --git a/win/tclWinConsole.c b/win/tclWinConsole.c index bb5166b..41a05ad 100644 --- a/win/tclWinConsole.c +++ b/win/tclWinConsole.c @@ -312,6 +312,8 @@ static void ConsoleExitHandler( ClientData clientData) /* Old window proc. */ { + (void)clientData; + Tcl_DeleteEventSource(ConsoleSetupProc, ConsoleCheckProc, NULL); } @@ -336,6 +338,8 @@ static void ProcExitHandler( ClientData clientData) /* Old window proc. */ { + (void)clientData; + Tcl_MutexLock(&consoleMutex); initialized = 0; Tcl_MutexUnlock(&consoleMutex); @@ -367,6 +371,7 @@ ConsoleSetupProc( Tcl_Time blockTime = { 0, 0 }; int block = 1; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); + (void)data; if (!(flags & TCL_FILE_EVENTS)) { return; @@ -737,7 +742,7 @@ ConsoleOutputProc( int toWrite, /* How many bytes to write? */ int *errorCode) /* Where to store error code. */ { - ConsoleInfo *infoPtr = instanceData; + ConsoleInfo *infoPtr = (ConsoleInfo *)instanceData; ConsoleThreadInfo *threadInfo = &infoPtr->writer; DWORD bytesWritten, timeout; @@ -781,7 +786,7 @@ ConsoleOutputProc( ckfree(infoPtr->writeBuf); } infoPtr->writeBufLen = toWrite; - infoPtr->writeBuf = ckalloc(toWrite); + infoPtr->writeBuf = (char *)ckalloc(toWrite); } memcpy(infoPtr->writeBuf, buf, toWrite); infoPtr->toWrite = toWrite; @@ -922,7 +927,7 @@ ConsoleWatchProc( * TCL_EXCEPTION. */ { ConsoleInfo **nextPtrPtr, *ptr; - ConsoleInfo *infoPtr = instanceData; + ConsoleInfo *infoPtr = (ConsoleInfo *)instanceData; int oldMask = infoPtr->watchMask; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -980,7 +985,8 @@ ConsoleGetHandleProc( int direction, /* TCL_READABLE or TCL_WRITABLE. */ ClientData *handlePtr) /* Where to store the handle. */ { - ConsoleInfo *infoPtr = instanceData; + ConsoleInfo *infoPtr = (ConsoleInfo *)instanceData; + (void)direction; *handlePtr = infoPtr->handle; return TCL_OK; @@ -1014,7 +1020,7 @@ WaitForRead( * or not. */ { DWORD timeout, count; - HANDLE *handle = infoPtr->handle; + HANDLE *handle = (HANDLE *)infoPtr->handle; ConsoleThreadInfo *threadInfo = &infoPtr->reader; INPUT_RECORD input; @@ -1315,7 +1321,7 @@ TclWinOpenConsoleChannel( * See if a channel with this handle already exists. */ - infoPtr = ckalloc(sizeof(ConsoleInfo)); + infoPtr = (ConsoleInfo *)ckalloc(sizeof(ConsoleInfo)); memset(infoPtr, 0, sizeof(ConsoleInfo)); infoPtr->validMask = permissions; @@ -1397,7 +1403,7 @@ ConsoleThreadActionProc( ClientData instanceData, int action) { - ConsoleInfo *infoPtr = instanceData; + ConsoleInfo *infoPtr = (ConsoleInfo *)instanceData; /* * We do not access firstConsolePtr in the thread structures. This is not diff --git a/win/tclWinFile.c b/win/tclWinFile.c index 639cd72..a6f27c9 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -987,7 +987,7 @@ TclpMatchInDirectory( * Verify that the specified path exists and is actually a directory. */ - native = Tcl_FSGetNativePath(pathPtr); + native = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); if (native == NULL) { return TCL_OK; } @@ -1477,24 +1477,23 @@ TclpGetUserHome( */ ptr = TclpGetUserName(&ds); if (ptr != NULL && strcasecmp(name, ptr) == 0) { - HANDLE hProcess; - WCHAR buf[MAX_PATH]; - DWORD nChars = sizeof(buf) / sizeof(buf[0]); - /* Sadly GetCurrentProcessToken not in Win 7 so slightly longer */ - hProcess = GetCurrentProcess(); /* Need not be closed */ - if (hProcess) { - HANDLE hToken; - if (OpenProcessToken(hProcess, TOKEN_QUERY, &hToken)) { - if (GetUserProfileDirectoryW(hToken, buf, &nChars)) { - Tcl_WinTCharToUtf((TCHAR *)buf, - (nChars-1)*sizeof(WCHAR), - bufferPtr); - result = Tcl_DStringValue(bufferPtr); - rc = 1; - } - CloseHandle(hToken); - } - } + HANDLE hProcess; + WCHAR buf[MAX_PATH]; + DWORD nChars = sizeof(buf) / sizeof(buf[0]); + /* Sadly GetCurrentProcessToken not in Win 7 so slightly longer */ + hProcess = GetCurrentProcess(); /* Need not be closed */ + if (hProcess) { + HANDLE hToken; + if (OpenProcessToken(hProcess, TOKEN_QUERY, &hToken)) { + if (GetUserProfileDirectoryW(hToken, buf, &nChars)) { + Tcl_WinTCharToUtf((TCHAR *)buf, + (nChars-1)*sizeof(WCHAR), bufferPtr); + result = Tcl_DStringValue(bufferPtr); + rc = 1; + } + CloseHandle(hToken); + } + } } Tcl_DStringFree(&ds); } else { @@ -1524,7 +1523,7 @@ TclpGetUserHome( if (rc != 0) { break; } - domain = INT2PTR(-1); /* repeat once */ + domain = (const char *)INT2PTR(-1); /* repeat once */ } if (rc == 0) { DWORD i, size = MAX_PATH; @@ -1919,7 +1918,7 @@ TclpObjChdir( int result; const WCHAR *nativePath; - nativePath = Tcl_FSGetNativePath(pathPtr); + nativePath = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); if (!nativePath) { return -1; @@ -2011,7 +2010,7 @@ TclpObjStat( TclWinFlushDirtyChannels(); - return NativeStat(Tcl_FSGetNativePath(pathPtr), statPtr, 0); + return NativeStat((const WCHAR *)Tcl_FSGetNativePath(pathPtr), statPtr, 0); } /* @@ -2204,7 +2203,7 @@ NativeDev( p = strchr(p + 1, '\\'); if (p == NULL) { /* - * Add terminating backslash to fullpath or GetVolumeInformation() + * Add terminating backslash to fullpath or GetVolumeInformationW() * won't work. */ @@ -2380,7 +2379,7 @@ TclpObjAccess( Tcl_Obj *pathPtr, int mode) { - return NativeAccess(Tcl_FSGetNativePath(pathPtr), mode); + return NativeAccess((const WCHAR *)Tcl_FSGetNativePath(pathPtr), mode); } int @@ -2396,7 +2395,7 @@ TclpObjLstat( TclWinFlushDirtyChannels(); - return NativeStat(Tcl_FSGetNativePath(pathPtr), statPtr, 1); + return NativeStat((const WCHAR *)Tcl_FSGetNativePath(pathPtr), statPtr, 1); } #ifdef S_IFLNK @@ -2409,14 +2408,14 @@ TclpObjLink( if (toPtr != NULL) { int res; const WCHAR *LinkTarget; - const WCHAR *LinkSource = Tcl_FSGetNativePath(pathPtr); + const WCHAR *LinkSource = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); Tcl_Obj *normalizedToPtr = Tcl_FSGetNormalizedPath(NULL, toPtr); if (normalizedToPtr == NULL) { return NULL; } - LinkTarget = Tcl_FSGetNativePath(normalizedToPtr); + LinkTarget = (const WCHAR *)Tcl_FSGetNativePath(normalizedToPtr); if (LinkSource == NULL || LinkTarget == NULL) { return NULL; @@ -2428,7 +2427,7 @@ TclpObjLink( return NULL; } } else { - const WCHAR *LinkSource = Tcl_FSGetNativePath(pathPtr); + const WCHAR *LinkSource = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); if (LinkSource == NULL) { return NULL; @@ -2477,13 +2476,13 @@ TclpFilesystemPathType( firstSeparator = strchr(path, '/'); if (firstSeparator == NULL) { - found = GetVolumeInformationW(Tcl_FSGetNativePath(pathPtr), + found = GetVolumeInformationW((const WCHAR *)Tcl_FSGetNativePath(pathPtr), NULL, 0, NULL, NULL, NULL, volType, VOL_BUF_SIZE); } else { Tcl_Obj *driveName = Tcl_NewStringObj(path, firstSeparator - path+1); Tcl_IncrRefCount(driveName); - found = GetVolumeInformationW(Tcl_FSGetNativePath(driveName), + found = GetVolumeInformationW((const WCHAR *)Tcl_FSGetNativePath(driveName), NULL, 0, NULL, NULL, NULL, volType, VOL_BUF_SIZE); Tcl_DecrRefCount(driveName); } @@ -2536,7 +2535,7 @@ TclpFilesystemPathType( int TclpObjNormalizePath( - Tcl_Interp *interp, + Tcl_Interp *interp, /* not used */ Tcl_Obj *pathPtr, /* An unshared object containing the path to * normalize */ int nextCheckpoint) /* offset to start at in pathPtr */ @@ -2547,6 +2546,7 @@ TclpObjNormalizePath( Tcl_Obj *temp = NULL; int isDrive = 1; Tcl_DString ds; /* Some workspace. */ + (void)interp; Tcl_DStringInit(&dsNorm); path = Tcl_GetString(pathPtr); @@ -2584,7 +2584,7 @@ TclpObjNormalizePath( int i; for (i=0 ; i= 'a') { wc -= ('a' - 'A'); @@ -3101,7 +3101,7 @@ TclNativeCreateNativeRep( * Overallocate 6 chars, making some room for extended paths */ - wp = nativePathPtr = ckalloc((len + 6) * sizeof(WCHAR)); + wp = nativePathPtr = (WCHAR *)ckalloc((len + 6) * sizeof(WCHAR)); if (nativePathPtr==0) { goto done; } @@ -3200,7 +3200,7 @@ TclNativeDupInternalRep( len = sizeof(WCHAR) * (wcslen((const WCHAR *) clientData) + 1); - copy = ckalloc(len); + copy = (char *)ckalloc(len); memcpy(copy, clientData, len); return copy; } @@ -3237,7 +3237,7 @@ TclpUtime( FromCTime(tval->actime, &lastAccessTime); FromCTime(tval->modtime, &lastModTime); - native = Tcl_FSGetNativePath(pathPtr); + native = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); attr = GetFileAttributesW(native); @@ -3288,7 +3288,7 @@ TclWinFileOwned( DWORD bufsz; int owned = 0; - native = Tcl_FSGetNativePath(pathPtr); + native = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); if (GetNamedSecurityInfoW((LPWSTR) native, SE_FILE_OBJECT, OWNER_SECURITY_INFORMATION, &ownerSid, NULL, NULL, NULL, @@ -3316,7 +3316,7 @@ TclWinFileOwned( bufsz = 0; GetTokenInformation(token, TokenUser, NULL, 0, &bufsz); if (bufsz) { - buf = ckalloc(bufsz); + buf = (LPBYTE)ckalloc(bufsz); if (GetTokenInformation(token, TokenUser, buf, bufsz, &bufsz)) { owned = EqualSid(ownerSid, ((PTOKEN_USER) buf)->User.Sid); } -- cgit v0.12 From ebfa9b6f2dab96c8ae9b1216ecc832b6b263fd98 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 09:36:14 +0000 Subject: ckalloc -> Tcl_Alloc and ckfree -> Tcl_Free --- generic/tclTestObj.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 820da0e..7accb9b 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1498,7 +1498,7 @@ TeststringobjCmd( Tcl_SetObjResult(interp, varPtr[varIndex]); break; case 13: /* newunicode*/ - unicode = (Tcl_UniChar *) ckalloc((objc - 3) * sizeof(Tcl_UniChar)); + unicode = (Tcl_UniChar *) Tcl_Alloc((objc - 3) * sizeof(Tcl_UniChar)); for (i = 0; i < (objc - 3); ++i) { int val; if (Tcl_GetIntFromObj(interp, objv[i + 3], &val) != TCL_OK) { @@ -1507,12 +1507,12 @@ TeststringobjCmd( unicode[i] = (Tcl_UniChar)val; } if (i < (objc-3)) { - ckfree(unicode); + Tcl_Free(unicode); return TCL_ERROR; } SetVarToObj(varPtr, varIndex, Tcl_NewUnicodeObj(unicode, objc - 3)); Tcl_SetObjResult(interp, varPtr[varIndex]); - ckfree(unicode); + Tcl_Free(unicode); break; } -- cgit v0.12 From 3203978c65afb3d5f284741440e6276f13d01e63 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 11:02:27 +0000 Subject: Add "teststringobj newunicode". Not used in testcases yet. --- generic/tclTestObj.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index c9a910a..66657d9 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1269,7 +1269,7 @@ TeststringobjCmd( static const char *const options[] = { "append", "appendstrings", "get", "get2", "length", "length2", "set", "set2", "setlength", "maxchars", "range", "appendself", - "appendself2", NULL + "appendself2", "newunicode", NULL }; if (objc < 3) { @@ -1513,7 +1513,24 @@ TeststringobjCmd( Tcl_AppendUnicodeToObj(varPtr[varIndex], unicode + length, size - length); Tcl_SetObjResult(interp, varPtr[varIndex]); break; - } + case 13: /* newunicode*/ + unicode = (unsigned short *) ckalloc((objc - 3) * sizeof(unsigned short)); + for (i = 0; i < (objc - 3); ++i) { + int val; + if (Tcl_GetIntFromObj(interp, objv[i + 3], &val) != TCL_OK) { + break; + } + unicode[i] = (unsigned short)val; + } + if (i < (objc-3)) { + ckfree(unicode); + return TCL_ERROR; + } + SetVarToObj(varPtr, varIndex, Tcl_NewUnicodeObj(unicode, objc - 3)); + Tcl_SetObjResult(interp, varPtr[varIndex]); + ckfree(unicode); + break; + } return TCL_OK; } -- cgit v0.12 From 10f4d4565dc1c86e6b26623c99d5709cac033f0f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 15:01:20 +0000 Subject: More -Wconversion warning fixes --- generic/tclIOCmd.c | 8 +++---- generic/tclOOBasic.c | 40 ++++++++++++++++--------------- generic/tclPathObj.c | 10 ++++---- generic/tclPkg.c | 52 ++++++++++++++++++++-------------------- generic/tclPreserve.c | 8 +++---- tests/fCmd.test | 4 ++-- unix/tclSelectNotfy.c | 10 ++++---- unix/tclUnixFile.c | 20 ++++++++-------- unix/tclUnixNotfy.c | 6 ++--- unix/tclUnixThrd.c | 2 +- win/tclWinFile.c | 20 ++++++++-------- win/tclWinLoad.c | 2 +- win/tclWinNotify.c | 16 ++++++------- win/tclWinPipe.c | 20 ++++++++-------- win/tclWinSerial.c | 66 +++++++++++++++++++++++++-------------------------- win/tclWinThrd.c | 8 +++---- 16 files changed, 147 insertions(+), 145 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 2298d48..6ec5891 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -44,7 +44,7 @@ static void RegisterTcpServerInterpCleanup( Tcl_Interp *interp, AcceptCallback *acceptCallbackPtr); static Tcl_InterpDeleteProc TcpAcceptCallbacksDeleteProc; -static void TcpServerCloseProc(ClientData callbackData); +static void TcpServerCloseProc(void *callbackData); static void UnregisterTcpServerInterpCleanupProc( Tcl_Interp *interp, AcceptCallback *acceptCallbackPtr); @@ -1183,7 +1183,7 @@ Tcl_OpenObjCmd( static void TcpAcceptCallbacksDeleteProc( - ClientData clientData, /* Data which was passed when the assocdata + void *clientData, /* Data which was passed when the assocdata * was registered. */ TCL_UNUSED(Tcl_Interp *)) { @@ -1311,7 +1311,7 @@ UnregisterTcpServerInterpCleanupProc( static void AcceptCallbackProc( - ClientData callbackData, /* The data stored when the callback was + void *callbackData, /* The data stored when the callback was * created in the call to * Tcl_OpenTcpServer. */ Tcl_Channel chan, /* Channel for the newly accepted @@ -1402,7 +1402,7 @@ AcceptCallbackProc( static void TcpServerCloseProc( - ClientData callbackData) /* The data passed in the call to + void *callbackData) /* The data passed in the call to * Tcl_CreateCloseHandler. */ { AcceptCallback *acceptCallbackPtr = (AcceptCallback *)callbackData; diff --git a/generic/tclOOBasic.c b/generic/tclOOBasic.c index d8ef59b..1ad351d 100644 --- a/generic/tclOOBasic.c +++ b/generic/tclOOBasic.c @@ -52,7 +52,7 @@ AddConstructionFinalizer( static int FinalizeConstruction( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -86,11 +86,12 @@ TclOO_Class_Constructor( Object *oPtr = (Object *) Tcl_ObjectContextObject(context); Tcl_Obj **invoke, *nameObj; - if (objc-1 > (int)Tcl_ObjectContextSkippedArgs(context)) { - Tcl_WrongNumArgs(interp, Tcl_ObjectContextSkippedArgs(context), objv, + size_t skip = Tcl_ObjectContextSkippedArgs(context); + if ((size_t)objc > skip + 1) { + Tcl_WrongNumArgs(interp, skip, objv, "?definitionScript?"); return TCL_ERROR; - } else if (objc == (int)Tcl_ObjectContextSkippedArgs(context)) { + } else if ((size_t)objc == skip) { return TCL_OK; } @@ -135,7 +136,7 @@ TclOO_Class_Constructor( static int DecrRefsPostClassConstructor( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -204,7 +205,7 @@ TclOO_Class_Create( * Check we have the right number of (sensible) arguments. */ - if (objc - Tcl_ObjectContextSkippedArgs(context) < 1) { + if ((size_t)objc < 1 + Tcl_ObjectContextSkippedArgs(context)) { Tcl_WrongNumArgs(interp, Tcl_ObjectContextSkippedArgs(context), objv, "objectName ?arg ...?"); return TCL_ERROR; @@ -269,7 +270,7 @@ TclOO_Class_CreateNs( * Check we have the right number of (sensible) arguments. */ - if (objc - Tcl_ObjectContextSkippedArgs(context) < 2) { + if ((size_t)objc + 1 < Tcl_ObjectContextSkippedArgs(context) + 3) { Tcl_WrongNumArgs(interp, Tcl_ObjectContextSkippedArgs(context), objv, "objectName namespaceName ?arg ...?"); return TCL_ERROR; @@ -393,7 +394,7 @@ TclOO_Object_Destroy( static int AfterNRDestructor( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -427,12 +428,12 @@ TclOO_Object_Eval( { CallContext *contextPtr = (CallContext *) context; Tcl_Object object = Tcl_ObjectContextObject(context); - const int skip = Tcl_ObjectContextSkippedArgs(context); + size_t skip = Tcl_ObjectContextSkippedArgs(context); CallFrame *framePtr, **framePtrPtr = &framePtr; Tcl_Obj *scriptPtr; CmdFrame *invoker; - if (objc-1 < skip) { + if ((size_t)objc < skip + 1) { Tcl_WrongNumArgs(interp, skip, objv, "arg ?arg ...?"); return TCL_ERROR; } @@ -460,7 +461,7 @@ TclOO_Object_Eval( * object when it decrements its refcount after eval'ing it. */ - if (objc != skip+1) { + if ((size_t)objc != skip+1) { scriptPtr = Tcl_ConcatObj(objc-skip, objv+skip); invoker = NULL; } else { @@ -479,7 +480,7 @@ TclOO_Object_Eval( static int FinalizeEval( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { @@ -531,7 +532,8 @@ TclOO_Object_Unknown( Class *callerCls = NULL; Object *oPtr = contextPtr->oPtr; const char **methodNames; - int numMethodNames, i, skip = Tcl_ObjectContextSkippedArgs(context); + int numMethodNames, i; + size_t skip = Tcl_ObjectContextSkippedArgs(context); CallFrame *framePtr = ((Interp *) interp)->varFramePtr; Tcl_Obj *errorMsg; @@ -541,7 +543,7 @@ TclOO_Object_Unknown( * name without an error). */ - if (objc < skip+1) { + if ((size_t)objc < skip+1) { Tcl_WrongNumArgs(interp, skip, objv, "method ?arg ...?"); return TCL_ERROR; } @@ -635,7 +637,7 @@ TclOO_Object_LinkVar( Interp *iPtr = (Interp *) interp; Tcl_Object object = Tcl_ObjectContextObject(context); Namespace *savedNsPtr; - int i; + size_t i; if ((size_t)objc < Tcl_ObjectContextSkippedArgs(context)) { Tcl_WrongNumArgs(interp, Tcl_ObjectContextSkippedArgs(context), objv, @@ -653,7 +655,7 @@ TclOO_Object_LinkVar( return TCL_OK; } - for (i=Tcl_ObjectContextSkippedArgs(context) ; ivarFramePtr = (CallFrame *)data[0]; if (contextPtr != NULL) { - contextPtr->index = PTR2INT(data[2]); + contextPtr->index = PTR2UINT(data[2]); } return result; } @@ -1090,7 +1092,7 @@ TclOOSelfObjCmd( return TCL_OK; case SELF_NS: Tcl_SetObjResult(interp, Tcl_NewStringObj( - contextPtr->oPtr->namespacePtr->fullName,-1)); + contextPtr->oPtr->namespacePtr->fullName, TCL_INDEX_NONE)); return TCL_OK; case SELF_CLASS: { Class *clsPtr = CurrentlyInvoked(contextPtr).mPtr->declaringClassPtr; diff --git a/generic/tclPathObj.c b/generic/tclPathObj.c index 19c1b9d..b14fd8a 100644 --- a/generic/tclPathObj.c +++ b/generic/tclPathObj.c @@ -65,7 +65,7 @@ typedef struct { * normPathPtr exists and is absolute. */ int flags; /* Flags to describe interpretation - see * below. */ - ClientData nativePathPtr; /* Native representation of this path, which + void *nativePathPtr; /* Native representation of this path, which * is filesystem dependent. */ size_t filesystemEpoch; /* Used to ensure the path representation was * generated during the correct filesystem @@ -1489,7 +1489,7 @@ MakePathFromNormalized( Tcl_Obj * Tcl_FSNewNativePath( const Tcl_Filesystem *fromFilesystem, - ClientData clientData) + void *clientData) { Tcl_Obj *pathPtr = NULL; FsPath *fsPathPtr; @@ -1927,7 +1927,7 @@ Tcl_FSGetNormalizedPath( *--------------------------------------------------------------------------- */ -ClientData +void * Tcl_FSGetInternalRep( Tcl_Obj *pathPtr, const Tcl_Filesystem *fsPtr) @@ -2074,7 +2074,7 @@ void TclFSSetPathDetails( Tcl_Obj *pathPtr, const Tcl_Filesystem *fsPtr, - ClientData clientData) + void *clientData) { FsPath *srcFsPathPtr; @@ -2368,7 +2368,7 @@ UpdateStringOfFsPath( int TclNativePathInFilesystem( Tcl_Obj *pathPtr, - TCL_UNUSED(ClientData *)) + TCL_UNUSED(void **)) { /* * A special case is required to handle the empty path "". This is a valid diff --git a/generic/tclPkg.c b/generic/tclPkg.c index 132a219..989f133 100644 --- a/generic/tclPkg.c +++ b/generic/tclPkg.c @@ -96,15 +96,15 @@ static void AddRequirementsToResult(Tcl_Interp *interp, int reqc, static void AddRequirementsToDString(Tcl_DString *dstring, int reqc, Tcl_Obj *const reqv[]); static Package * FindPackage(Tcl_Interp *interp, const char *name); -static int PkgRequireCore(ClientData data[], Tcl_Interp *interp, int result); -static int PkgRequireCoreFinal(ClientData data[], Tcl_Interp *interp, int result); -static int PkgRequireCoreCleanup(ClientData data[], Tcl_Interp *interp, int result); -static int PkgRequireCoreStep1(ClientData data[], Tcl_Interp *interp, int result); -static int PkgRequireCoreStep2(ClientData data[], Tcl_Interp *interp, int result); -static int TclNRPkgRequireProc(ClientData clientData, Tcl_Interp *interp, int reqc, Tcl_Obj *const reqv[]); -static int SelectPackage(ClientData data[], Tcl_Interp *interp, int result); -static int SelectPackageFinal(ClientData data[], Tcl_Interp *interp, int result); -static int TclNRPackageObjCmdCleanup(ClientData data[], Tcl_Interp *interp, int result); +static int PkgRequireCore(void *data[], Tcl_Interp *interp, int result); +static int PkgRequireCoreFinal(void *data[], Tcl_Interp *interp, int result); +static int PkgRequireCoreCleanup(void *data[], Tcl_Interp *interp, int result); +static int PkgRequireCoreStep1(void *data[], Tcl_Interp *interp, int result); +static int PkgRequireCoreStep2(void *data[], Tcl_Interp *interp, int result); +static int TclNRPkgRequireProc(void *clientData, Tcl_Interp *interp, int reqc, Tcl_Obj *const reqv[]); +static int SelectPackage(void *data[], Tcl_Interp *interp, int result); +static int SelectPackageFinal(void *data[], Tcl_Interp *interp, int result); +static int TclNRPackageObjCmdCleanup(void *data[], Tcl_Interp *interp, int result); /* * Helper macros. @@ -225,7 +225,7 @@ Tcl_PkgProvideEx( static void PkgFilesCleanupProc( - ClientData clientData, + void *clientData, TCL_UNUSED(Tcl_Interp *)) { PkgFiles *pkgFiles = (PkgFiles *) clientData; @@ -442,7 +442,7 @@ Tcl_PkgRequireProc( static int TclNRPkgRequireProc( - ClientData clientData, + void *clientData, Tcl_Interp *interp, int reqc, Tcl_Obj *const reqv[]) @@ -457,12 +457,12 @@ TclNRPkgRequireProc( static int PkgRequireCore( - ClientData data[], + void *data[], Tcl_Interp *interp, TCL_UNUSED(int)) { const char *name = (const char *)data[0]; - int reqc = PTR2INT(data[1]); + int reqc = (int)PTR2INT(data[1]); Tcl_Obj **reqv = (Tcl_Obj **)data[2]; int code = CheckAllRequirements(interp, reqc, reqv); Require *reqPtr; @@ -488,14 +488,14 @@ PkgRequireCore( static int PkgRequireCoreStep1( - ClientData data[], + void *data[], Tcl_Interp *interp, TCL_UNUSED(int)) { Tcl_DString command; char *script; Require *reqPtr = (Require *)data[0]; - int reqc = PTR2INT(data[1]); + int reqc = (int)PTR2INT(data[1]); Tcl_Obj **const reqv = (Tcl_Obj **)data[2]; const char *name = reqPtr->name /* Name of desired package. */; @@ -547,12 +547,12 @@ PkgRequireCoreStep1( static int PkgRequireCoreStep2( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { Require *reqPtr = (Require *)data[0]; - int reqc = PTR2INT(data[1]); + int reqc = (int)PTR2INT(data[1]); Tcl_Obj **const reqv = (Tcl_Obj **)data[2]; const char *name = reqPtr->name; /* Name of desired package. */ @@ -582,12 +582,12 @@ PkgRequireCoreStep2( static int PkgRequireCoreFinal( - ClientData data[], + void *data[], Tcl_Interp *interp, TCL_UNUSED(int)) { Require *reqPtr = (Require *)data[0]; - int reqc = PTR2INT(data[1]), satisfies; + int reqc = (int)PTR2INT(data[1]), satisfies; Tcl_Obj **const reqv = (Tcl_Obj **)data[2]; char *pkgVersionI; void *clientDataPtr = reqPtr->clientDataPtr; @@ -634,7 +634,7 @@ PkgRequireCoreFinal( static int PkgRequireCoreCleanup( - ClientData data[], + void *data[], TCL_UNUSED(Tcl_Interp *), int result) { @@ -644,7 +644,7 @@ PkgRequireCoreCleanup( static int SelectPackage( - ClientData data[], + void *data[], Tcl_Interp *interp, TCL_UNUSED(int)) { @@ -653,7 +653,7 @@ SelectPackage( /* Internal rep. of versions */ int availStable, satisfies; Require *reqPtr = (Require *)data[0]; - int reqc = PTR2INT(data[1]); + int reqc = (int)PTR2INT(data[1]); Tcl_Obj **const reqv = (Tcl_Obj **)data[2]; const char *name = reqPtr->name; Package *pkgPtr = reqPtr->pkgPtr; @@ -847,12 +847,12 @@ SelectPackage( static int SelectPackageFinal( - ClientData data[], + void *data[], Tcl_Interp *interp, int result) { Require *reqPtr = (Require *)data[0]; - int reqc = PTR2INT(data[1]); + int reqc = (int)PTR2INT(data[1]); Tcl_Obj **const reqv = (Tcl_Obj **)data[2]; const char *name = reqPtr->name; char *versionToProvide = reqPtr->versionToProvide; @@ -1053,7 +1053,7 @@ Tcl_PkgPresentEx( */ int Tcl_PackageObjCmd( - ClientData clientData, + void *clientData, Tcl_Interp *interp, /* Current interpreter. */ int objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ @@ -1539,7 +1539,7 @@ TclNRPackageObjCmd( static int TclNRPackageObjCmdCleanup( - ClientData data[], + void *data[], TCL_UNUSED(Tcl_Interp *), int result) { diff --git a/generic/tclPreserve.c b/generic/tclPreserve.c index 5bc0a1a..ff4b45b 100644 --- a/generic/tclPreserve.c +++ b/generic/tclPreserve.c @@ -21,7 +21,7 @@ */ typedef struct { - ClientData clientData; /* Address of preserved block. */ + void *clientData; /* Address of preserved block. */ size_t refCount; /* Number of Tcl_Preserve calls in effect for * block. */ int mustFree; /* Non-zero means Tcl_EventuallyFree was @@ -117,7 +117,7 @@ TclFinalizePreserve(void) void Tcl_Preserve( - ClientData clientData) /* Pointer to malloc'ed block of memory. */ + void *clientData) /* Pointer to malloc'ed block of memory. */ { Reference *refPtr; size_t i; @@ -180,7 +180,7 @@ Tcl_Preserve( void Tcl_Release( - ClientData clientData) /* Pointer to malloc'ed block of memory. */ + void *clientData) /* Pointer to malloc'ed block of memory. */ { Reference *refPtr; size_t i; @@ -259,7 +259,7 @@ Tcl_Release( void Tcl_EventuallyFree( - ClientData clientData, /* Pointer to malloc'ed block of memory. */ + void *clientData, /* Pointer to malloc'ed block of memory. */ Tcl_FreeProc *freeProc) /* Function to actually do free. */ { Reference *refPtr; diff --git a/tests/fCmd.test b/tests/fCmd.test index d60e58c..dcfe270 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -136,7 +136,7 @@ proc gethomedirglob {user} { set sid [string trim $sid] # Get path from the Windows registry set home [registry get "HKEY_LOCAL_MACHINE\\Software\\Microsoft\\Windows NT\\CurrentVersion\\ProfileList\\$sid" ProfileImagePath] - set home [string trim $home] + set home [string trim [string tolower $home]] } result]} { if {$home ne ""} { # file join for \ -> / @@ -147,7 +147,7 @@ proc gethomedirglob {user} { # Caller will need to use glob matching and hope user # name is in the home directory path - return *$user* + return *[string tolower $user]* } proc createfile {file {string a}} { diff --git a/unix/tclSelectNotfy.c b/unix/tclSelectNotfy.c index 7d14c26..feabfa8 100644 --- a/unix/tclSelectNotfy.c +++ b/unix/tclSelectNotfy.c @@ -32,7 +32,7 @@ typedef struct FileHandler { * for this file. */ Tcl_FileProc *proc; /* Function to call, in the style of * Tcl_CreateFileHandler. */ - ClientData clientData; /* Argument to pass to proc. */ + void *clientData; /* Argument to pass to proc. */ struct FileHandler *nextPtr;/* Next in list of all files we care about. */ } FileHandler; @@ -214,7 +214,7 @@ static sigset_t allSigMask; */ #if TCL_THREADS -static TCL_NORETURN void NotifierThreadProc(ClientData clientData); +static TCL_NORETURN void NotifierThreadProc(void *clientData); #if defined(HAVE_PTHREAD_ATFORK) static int atForkInit = 0; static void AtForkChild(void); @@ -313,7 +313,7 @@ static unsigned int __stdcall NotifierProc(void *hwnd, unsigned int message, *---------------------------------------------------------------------- */ -ClientData +void * TclpInitNotifier(void) { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -480,7 +480,7 @@ TclpCreateFileHandler( * called. */ Tcl_FileProc *proc, /* Function to call for each selected * event. */ - ClientData clientData) /* Arbitrary data to pass to proc. */ + void *clientData) /* Arbitrary data to pass to proc. */ { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); FileHandler *filePtr = LookUpFileHandler(tsdPtr, fd, NULL); @@ -1179,7 +1179,7 @@ NotifierThreadProc( */ do { - i = read(receivePipe, buf, 1); + i = (int)read(receivePipe, buf, 1); if (i <= 0) { break; } else if ((i == 0) || ((i == 1) && (buf[0] == 'q'))) { diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c index 830ed6f..673aa72 100644 --- a/unix/tclUnixFile.c +++ b/unix/tclUnixFile.c @@ -709,9 +709,9 @@ TclpObjLstat( *---------------------------------------------------------------------- */ -ClientData +void * TclpGetNativeCwd( - ClientData clientData) + void *clientData) { char buffer[MAXPATHLEN+1]; @@ -813,7 +813,7 @@ TclpReadlink( { #ifndef DJGPP char link[MAXPATHLEN]; - int length; + ssize_t length; const char *native; Tcl_DString ds; @@ -825,7 +825,7 @@ TclpReadlink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, linkPtr); + Tcl_ExternalToUtfDStringEx(NULL, link, (size_t)length, TCL_ENCODING_NOCOMPLAIN, linkPtr); return Tcl_DStringValue(linkPtr); #else return NULL; @@ -979,7 +979,7 @@ TclpObjLink( Tcl_Obj *linkPtr = NULL; char link[MAXPATHLEN]; - int length; + ssize_t length; Tcl_DString ds; Tcl_Obj *transPtr; @@ -994,7 +994,7 @@ TclpObjLink( return NULL; } - Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, &ds); + Tcl_ExternalToUtfDStringEx(NULL, link, (size_t)length, TCL_ENCODING_NOCOMPLAIN, &ds); linkPtr = Tcl_DStringToObj(&ds); Tcl_IncrRefCount(linkPtr); return linkPtr; @@ -1055,7 +1055,7 @@ TclpFilesystemPathType( Tcl_Obj * TclpNativeToNormalized( - ClientData clientData) + void *clientData) { Tcl_DString ds; @@ -1079,7 +1079,7 @@ TclpNativeToNormalized( *--------------------------------------------------------------------------- */ -ClientData +void * TclNativeCreateNativeRep( Tcl_Obj *pathPtr) { @@ -1146,9 +1146,9 @@ TclNativeCreateNativeRep( *--------------------------------------------------------------------------- */ -ClientData +void * TclNativeDupInternalRep( - ClientData clientData) + void *clientData) { char *copy; size_t len; diff --git a/unix/tclUnixNotfy.c b/unix/tclUnixNotfy.c index 943e7d7..6ecde5d 100644 --- a/unix/tclUnixNotfy.c +++ b/unix/tclUnixNotfy.c @@ -27,7 +27,7 @@ static int FileHandlerEventProc(Tcl_Event *evPtr, int flags); # define NOTIFIER_SELECT #elif !defined(NOTIFIER_EPOLL) && !defined(NOTIFIER_KQUEUE) # define NOTIFIER_SELECT -static TCL_NORETURN void NotifierThreadProc(ClientData clientData); +static TCL_NORETURN void NotifierThreadProc(void *clientData); # if defined(HAVE_PTHREAD_ATFORK) static void AtForkChild(void); # endif /* HAVE_PTHREAD_ATFORK */ @@ -497,13 +497,13 @@ AtForkChild(void) *---------------------------------------------------------------------- */ -ClientData +void * TclpNotifierData(void) { #if defined(NOTIFIER_EPOLL) || defined(NOTIFIER_KQUEUE) ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); - return (ClientData) tsdPtr; + return (void *) tsdPtr; #else return NULL; #endif diff --git a/unix/tclUnixThrd.c b/unix/tclUnixThrd.c index 36f0648..cf3b7a1 100644 --- a/unix/tclUnixThrd.c +++ b/unix/tclUnixThrd.c @@ -213,7 +213,7 @@ int TclpThreadCreate( Tcl_ThreadId *idPtr, /* Return, the ID of the thread */ Tcl_ThreadCreateProc *proc, /* Main() function of the thread */ - ClientData clientData, /* The one argument to Main() */ + void *clientData, /* The one argument to Main() */ size_t stackSize, /* Size of stack for the new thread */ int flags) /* Flags controlling behaviour of the new * thread. */ diff --git a/win/tclWinFile.c b/win/tclWinFile.c index 4c63222..b16a707 100644 --- a/win/tclWinFile.c +++ b/win/tclWinFile.c @@ -170,7 +170,7 @@ static int NativeWriteReparse(const WCHAR *LinkDirectory, static int NativeMatchType(int isDrive, DWORD attr, const WCHAR *nativeName, Tcl_GlobTypeData *types); static int WinIsDrive(const char *name, size_t nameLen); -static Tcl_Size WinIsReserved(const char *path); +static size_t WinIsReserved(const char *path); static Tcl_Obj * WinReadLink(const WCHAR *LinkSource); static Tcl_Obj * WinReadLinkDirectory(const WCHAR *LinkDirectory); static int WinLink(const WCHAR *LinkSource, @@ -921,7 +921,7 @@ TclpMatchInDirectory( DWORD attr; WIN32_FILE_ATTRIBUTE_DATA data; - Tcl_Size len = 0; + size_t len = 0; const char *str = Tcl_GetStringFromObj(norm, &len); native = (const WCHAR *)Tcl_FSGetNativePath(pathPtr); @@ -943,7 +943,7 @@ TclpMatchInDirectory( WIN32_FIND_DATAW data; const char *dirName; /* UTF-8 dir name, later with pattern * appended. */ - Tcl_Size dirLength; + size_t dirLength; int matchSpecialDots; Tcl_DString ds; /* Native encoding of dir, also used * temporarily for other things. */ @@ -1226,7 +1226,7 @@ WinIsDrive( * (not any trailing :). */ -static Tcl_Size +static size_t WinIsReserved( const char *path) /* Path in UTF-8 */ { @@ -2560,14 +2560,14 @@ TclpObjNormalizePath( */ if (isDrive) { - Tcl_Size len = WinIsReserved(path); + size_t len = WinIsReserved(path); if (len > 0) { /* * Actually it does exist - COM1, etc. */ - Tcl_Size i; + size_t i; for (i=0 ; iclientData = (ClientData) hInstance; + handlePtr->clientData = (void *)hInstance; handlePtr->findSymbolProcPtr = &FindSymbol; handlePtr->unloadFileProcPtr = &UnloadFile; *loadHandle = handlePtr; diff --git a/win/tclWinNotify.c b/win/tclWinNotify.c index ec6fd51..bcb4e08 100644 --- a/win/tclWinNotify.c +++ b/win/tclWinNotify.c @@ -76,7 +76,7 @@ static LRESULT CALLBACK NotifierProc(HWND hwnd, UINT message, *---------------------------------------------------------------------- */ -ClientData +void * TclpInitNotifier(void) { ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); @@ -148,7 +148,7 @@ TclpInitNotifier(void) void TclpFinalizeNotifier( - ClientData clientData) /* Pointer to notifier data. */ + void *clientData) /* Pointer to notifier data. */ { ThreadSpecificData *tsdPtr = (ThreadSpecificData *) clientData; @@ -218,7 +218,7 @@ TclpFinalizeNotifier( void TclpAlertNotifier( - ClientData clientData) /* Pointer to thread data. */ + void *clientData) /* Pointer to thread data. */ { ThreadSpecificData *tsdPtr = (ThreadSpecificData *) clientData; @@ -287,7 +287,7 @@ TclpSetTimer( * Windows seems to get confused by zero length timers. */ - timeout = timePtr->sec * 1000 + timePtr->usec / 1000; + timeout = (UINT)timePtr->sec * 1000 + (unsigned long)timePtr->usec / 1000; if (timeout == 0) { timeout = 1; } @@ -437,7 +437,7 @@ NotifierProc( *---------------------------------------------------------------------- */ -ClientData +void * TclpNotifierData(void) { return NULL; @@ -490,7 +490,7 @@ TclpWaitForEvent( TclScaleTime(&myTime); } - timeout = myTime.sec * 1000 + myTime.usec / 1000; + timeout = (DWORD)myTime.sec * 1000 + (unsigned long)myTime.usec / 1000; } else { timeout = INFINITE; } @@ -610,7 +610,7 @@ Tcl_Sleep( */ TclScaleTime(&vdelay); - sleepTime = vdelay.sec * 1000 + vdelay.usec / 1000; + sleepTime = (DWORD)vdelay.sec * 1000 + (unsigned long)vdelay.usec / 1000; for (;;) { SleepEx(sleepTime, TRUE); @@ -625,7 +625,7 @@ Tcl_Sleep( vdelay.usec = desired.usec - now.usec; TclScaleTime(&vdelay); - sleepTime = vdelay.sec * 1000 + vdelay.usec / 1000; + sleepTime = (DWORD)vdelay.sec * 1000 + (unsigned long)vdelay.usec / 1000; } } diff --git a/win/tclWinPipe.c b/win/tclWinPipe.c index b7949d1..84e6ab0 100644 --- a/win/tclWinPipe.c +++ b/win/tclWinPipe.c @@ -104,7 +104,7 @@ typedef struct PipeInfo { TclFile readFile; /* Output from pipe. */ TclFile writeFile; /* Input from pipe. */ TclFile errorFile; /* Error output from pipe. */ - Tcl_Size numPids; /* Number of processes attached to pipe. */ + size_t numPids; /* Number of processes attached to pipe. */ Tcl_Pid *pidPtr; /* Pids of attached processes. */ Tcl_ThreadId threadId; /* Thread to which events should be reported. * This value is used by the reader/writer @@ -171,7 +171,7 @@ typedef struct { static int ApplicationType(Tcl_Interp *interp, const char *fileName, char *fullName); -static void BuildCommandLine(const char *executable, Tcl_Size argc, +static void BuildCommandLine(const char *executable, size_t argc, const char **argv, Tcl_DString *linePtr); static BOOL HasConsole(void); static int PipeBlockModeProc(void *instanceData, int mode); @@ -859,7 +859,7 @@ TclpCloseFile( *-------------------------------------------------------------------------- */ -Tcl_Size +size_t TclpGetPid( Tcl_Pid pid) /* The HANDLE of the child process. */ { @@ -911,7 +911,7 @@ TclpCreateProcess( * occurred when creating the child process. * Error messages from the child process * itself are sent to errorFile. */ - Tcl_Size argc, /* Number of arguments in following array. */ + size_t argc, /* Number of arguments in following array. */ const char **argv, /* Array of argument strings. argv[0] contains * the name of the executable converted to * native format (using the @@ -1536,14 +1536,14 @@ static void BuildCommandLine( const char *executable, /* Full path of executable (including * extension). Replacement for argv[0]. */ - Tcl_Size argc, /* Number of arguments. */ + size_t argc, /* Number of arguments. */ const char **argv, /* Argument strings in UTF. */ Tcl_DString *linePtr) /* Initialized Tcl_DString that receives the * command line (WCHAR). */ { const char *arg, *start, *special, *bspos; int quote = 0; - Tcl_Size i; + size_t i; Tcl_DString ds; static const char specMetaChars[] = "&|^<>!()%"; /* Characters to enclose in quotes if unpaired @@ -1760,7 +1760,7 @@ TclpCreateCommandChannel( TclFile writeFile, /* If non-null, gives the file for writing. */ TclFile errorFile, /* If non-null, gives the file where errors * can be read. */ - Tcl_Size numPids, /* The number of pids in the pid array. */ + size_t numPids, /* The number of pids in the pid array. */ Tcl_Pid *pidPtr) /* An array of process identifiers. */ { char channelName[16 + TCL_INTEGER_SPACE]; @@ -1900,7 +1900,7 @@ TclGetAndDetachPids( PipeInfo *pipePtr; const Tcl_ChannelType *chanTypePtr; Tcl_Obj *pidsObj; - Tcl_Size i; + size_t i; /* * Punt if the channel is not a command channel. @@ -2744,7 +2744,7 @@ Tcl_PidObjCmd( Tcl_Channel chan; const Tcl_ChannelType *chanTypePtr; PipeInfo *pipePtr; - Tcl_Size i; + size_t i; Tcl_Obj *resultPtr; if (objc > 2) { @@ -3191,7 +3191,7 @@ TclpOpenTemporaryFile( char *namePtr; HANDLE handle; DWORD flags = FILE_ATTRIBUTE_TEMPORARY; - Tcl_Size length; + size_t length; int counter, counter2; Tcl_DString buf; diff --git a/win/tclWinSerial.c b/win/tclWinSerial.c index 3db36d5..78b47b9 100644 --- a/win/tclWinSerial.c +++ b/win/tclWinSerial.c @@ -85,7 +85,7 @@ typedef struct SerialInfo { int readable; /* Flag that the channel is readable. */ int writable; /* Flag that the channel is writable. */ int blockTime; /* Maximum blocktime in msec. */ - unsigned int lastEventTime; /* Time in milliseconds since last readable + unsigned long long lastEventTime; /* Time in milliseconds since last readable * event. */ /* Next readable event only after blockTime */ DWORD error; /* pending error code returned by @@ -165,30 +165,30 @@ static COMMTIMEOUTS no_timeout = { * Declarations for functions used only in this file. */ -static int SerialBlockProc(ClientData instanceData, int mode); -static void SerialCheckProc(ClientData clientData, int flags); -static int SerialCloseProc(ClientData instanceData, +static int SerialBlockProc(void *instanceData, int mode); +static void SerialCheckProc(void *clientData, int flags); +static int SerialCloseProc(void *instanceData, Tcl_Interp *interp, int flags); static int SerialEventProc(Tcl_Event *evPtr, int flags); -static void SerialExitHandler(ClientData clientData); -static int SerialGetHandleProc(ClientData instanceData, - int direction, ClientData *handlePtr); +static void SerialExitHandler(void *clientData); +static int SerialGetHandleProc(void *instanceData, + int direction, void **handlePtr); static ThreadSpecificData *SerialInit(void); -static int SerialInputProc(ClientData instanceData, char *buf, +static int SerialInputProc(void *instanceData, char *buf, int toRead, int *errorCode); -static int SerialOutputProc(ClientData instanceData, +static int SerialOutputProc(void *instanceData, const char *buf, int toWrite, int *errorCode); -static void SerialSetupProc(ClientData clientData, int flags); -static void SerialWatchProc(ClientData instanceData, int mask); -static void ProcExitHandler(ClientData clientData); -static int SerialGetOptionProc(ClientData instanceData, +static void SerialSetupProc(void *clientData, int flags); +static void SerialWatchProc(void *instanceData, int mask); +static void ProcExitHandler(void *clientData); +static int SerialGetOptionProc(void *instanceData, Tcl_Interp *interp, const char *optionName, Tcl_DString *dsPtr); -static int SerialSetOptionProc(ClientData instanceData, +static int SerialSetOptionProc(void *instanceData, Tcl_Interp *interp, const char *optionName, const char *value); static DWORD WINAPI SerialWriterThread(LPVOID arg); -static void SerialThreadActionProc(ClientData instanceData, +static void SerialThreadActionProc(void *instanceData, int action); static int SerialBlockingRead(SerialInfo *infoPtr, LPVOID buf, DWORD bufSize, LPDWORD lpRead, LPOVERLAPPED osPtr); @@ -373,14 +373,14 @@ SerialBlockTime( *---------------------------------------------------------------------- */ -static unsigned int +static unsigned long long SerialGetMilliseconds(void) { Tcl_Time time; Tcl_GetTime(&time); - return (time.sec * 1000 + time.usec / 1000); + return ((unsigned long long)time.sec * 1000 + (unsigned long)time.usec / 1000); } /* @@ -469,7 +469,7 @@ SerialCheckProc( int needEvent; ThreadSpecificData *tsdPtr = TCL_TSD_INIT(&dataKey); COMSTAT cStat; - unsigned int time; + unsigned long long time; if (!(flags & TCL_FILE_EVENTS)) { return; @@ -519,8 +519,8 @@ SerialCheckProc( (infoPtr->error & SERIAL_READ_ERRORS)) { infoPtr->readable = 1; time = SerialGetMilliseconds(); - if ((unsigned int) (time - infoPtr->lastEventTime) - >= (unsigned int) infoPtr->blockTime) { + if ((time - infoPtr->lastEventTime) + >= (unsigned long long) infoPtr->blockTime) { needEvent = 1; infoPtr->lastEventTime = time; } @@ -561,7 +561,7 @@ SerialCheckProc( static int SerialBlockProc( - ClientData instanceData, /* Instance data for channel. */ + void *instanceData, /* Instance data for channel. */ int mode) /* TCL_MODE_BLOCKING or * TCL_MODE_NONBLOCKING. */ { @@ -600,7 +600,7 @@ SerialBlockProc( static int SerialCloseProc( - ClientData instanceData, /* Pointer to SerialInfo structure. */ + void *instanceData, /* Pointer to SerialInfo structure. */ TCL_UNUSED(Tcl_Interp *), int flags) { @@ -796,7 +796,7 @@ SerialBlockingWrite( LeaveCriticalSection(&infoPtr->csWrite); if (result == FALSE) { - int err = GetLastError(); + DWORD err = GetLastError(); switch (err) { case ERROR_IO_PENDING: @@ -855,7 +855,7 @@ SerialBlockingWrite( static int SerialInputProc( - ClientData instanceData, /* Serial state. */ + void *instanceData, /* Serial state. */ char *buf, /* Where to store data read. */ int bufSize, /* How much space is available in the * buffer? */ @@ -918,7 +918,7 @@ SerialInputProc( } if (bufSize == 0) { - return bytesRead = 0; + return 0; } /* @@ -962,7 +962,7 @@ SerialInputProc( static int SerialOutputProc( - ClientData instanceData, /* Serial state. */ + void *instanceData, /* Serial state. */ const char *buf, /* The data buffer. */ int toWrite, /* How many bytes to write? */ int *errorCode) /* Where to store error code. */ @@ -1192,7 +1192,7 @@ SerialEventProc( static void SerialWatchProc( - ClientData instanceData, /* Serial state. */ + void *instanceData, /* Serial state. */ int mask) /* What events to watch for, OR-ed combination * of TCL_READABLE, TCL_WRITABLE and * TCL_EXCEPTION. */ @@ -1249,13 +1249,13 @@ SerialWatchProc( static int SerialGetHandleProc( - ClientData instanceData, /* The serial state. */ + void *instanceData, /* The serial state. */ TCL_UNUSED(int) /*direction*/, - ClientData *handlePtr) /* Where to store the handle. */ + void **handlePtr) /* Where to store the handle. */ { SerialInfo *infoPtr = (SerialInfo *) instanceData; - *handlePtr = (ClientData) infoPtr->handle; + *handlePtr = (void *) infoPtr->handle; return TCL_OK; } @@ -1613,7 +1613,7 @@ SerialModemStatusStr( static int SerialSetOptionProc( - ClientData instanceData, /* File state. */ + void *instanceData, /* File state. */ Tcl_Interp *interp, /* For error reporting - can be NULL. */ const char *optionName, /* Which option to set? */ const char *value) /* New value for option. */ @@ -2037,7 +2037,7 @@ SerialSetOptionProc( static int SerialGetOptionProc( - ClientData instanceData, /* File state. */ + void *instanceData, /* File state. */ Tcl_Interp *interp, /* For error reporting - can be NULL. */ const char *optionName, /* Option to get. */ Tcl_DString *dsPtr) /* Where to store value(s). */ @@ -2274,7 +2274,7 @@ SerialGetOptionProc( static void SerialThreadActionProc( - ClientData instanceData, + void *instanceData, int action) { SerialInfo *infoPtr = (SerialInfo *) instanceData; diff --git a/win/tclWinThrd.c b/win/tclWinThrd.c index 841a854..0195895 100644 --- a/win/tclWinThrd.c +++ b/win/tclWinThrd.c @@ -203,7 +203,7 @@ int TclpThreadCreate( Tcl_ThreadId *idPtr, /* Return, the ID of the thread. */ Tcl_ThreadCreateProc *proc, /* Main() function of the thread. */ - ClientData clientData, /* The one argument to Main(). */ + void *clientData, /* The one argument to Main(). */ size_t stackSize, /* Size of stack for the new thread. */ int flags) /* Flags controlling behaviour of the new * thread. */ @@ -535,7 +535,7 @@ TclFinalizeLock(void) #if TCL_THREADS /* locally used prototype */ -static void FinalizeConditionEvent(ClientData data); +static void FinalizeConditionEvent(void *data); /* *---------------------------------------------------------------------- @@ -725,7 +725,7 @@ Tcl_ConditionWait( if (timePtr == NULL) { wtime = INFINITE; } else { - wtime = timePtr->sec * 1000 + timePtr->usec / 1000; + wtime = (DWORD)timePtr->sec * 1000 + (unsigned long)timePtr->usec / 1000; } /* @@ -880,7 +880,7 @@ Tcl_ConditionNotify( static void FinalizeConditionEvent( - ClientData data) + void *data) { ThreadSpecificData *tsdPtr = (ThreadSpecificData *) data; -- cgit v0.12 From 7ed7017d94b407f12d57a464cd46a4bf1f2f976b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 19:58:08 +0000 Subject: Add "notWsl" test constraints. Clean up many testcases --- tests/chanio.test | 285 +++++++++++++++++++++++++++------------------------- tests/cmdAH.test | 17 ++-- tests/fCmd.test | 105 ++++++++++--------- tests/tcltest.test | 15 +-- tests/unixFCmd.test | 12 ++- win/tclWinTest.c | 31 +++--- 6 files changed, 241 insertions(+), 224 deletions(-) diff --git a/tests/chanio.test b/tests/chanio.test index 1c689fb..81c31d8 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -8,7 +8,7 @@ # # Copyright (c) 1991-1994 The Regents of the University of California. # Copyright (c) 1994-1997 Sun Microsystems, Inc. -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -45,6 +45,8 @@ namespace eval ::tcl::test::io { testConstraint notWinCI [expr { $::tcl_platform(platform) ne "windows" || ![info exists ::env(CI)]}] testConstraint notOSX [expr {$::tcl_platform(os) ne "Darwin"}] + # File permissions broken on wsl without some "exotic" wsl configuration + testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] # You need a *very* special environment to do some tests. In particular, # many file systems do not support large-files... @@ -74,7 +76,7 @@ namespace eval ::tcl::test::io { if {$argv != ""} { set f [open [lindex $argv 0]] } - chan configure $f -encoding binary -translation lf -blocking 0 -eofchar \x1a + chan configure $f -encoding binary -translation lf -blocking 0 -eofchar \x1A chan configure stdout -encoding binary -translation lf -buffering none chan event $f readable "foo $f" proc foo {f} { @@ -110,17 +112,17 @@ set path(test1) [makeFile {} test1] test chan-io-1.6 {Tcl_WriteChars: WriteBytes} { set f [open $path(test1) w] chan configure $f -encoding binary - chan puts -nonewline $f "a\u4e4d\0" + chan puts -nonewline $f a\u4E4D\x00 chan close $f contents $path(test1) -} "a\x4d\x00" +} aM\x00 test chan-io-1.7 {Tcl_WriteChars: WriteChars} { set f [open $path(test1) w] chan configure $f -encoding shiftjis - chan puts -nonewline $f "a\u4e4d\0" + chan puts -nonewline $f "a\u4E4D\0" chan close $f contents $path(test1) -} "a\x93\xe1\x00" +} "a\x93\xE1\x00" set path(test2) [makeFile {} test2] test chan-io-1.8 {Tcl_WriteChars: WriteChars} { # This test written for SF bug #506297. @@ -133,7 +135,7 @@ test chan-io-1.8 {Tcl_WriteChars: WriteChars} { chan puts -nonewline $f [format %s%c [string repeat " " 4] 12399] chan close $f contents $path(test2) -} " \x1b\$B\$O\x1b(B" +} " \x1B\$B\$O\x1B(B" test chan-io-1.9 {Tcl_WriteChars: WriteChars} { # When closing a channel with an encoding that appends escape bytes, check # for the case where the escape bytes overflow the current IO buffer. The @@ -243,7 +245,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod } -cleanup { chan close $f } -result "\r\n12" -test chan-io-3.4 {WriteChars: loop over stage buffer} { +test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] chan configure $f -encoding jis0208 -buffersize 16 @@ -251,8 +253,10 @@ test chan-io-3.4 {WriteChars: loop over stage buffer} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test chan-io-3.5 {WriteChars: saved != 0} { +} -cleanup { + catch {chan close $f} +} -result [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +test chan-io-3.5 {WriteChars: saved != 0} -body { # Bytes produced by UtfToExternal from end of last channel buffer had to # be moved to beginning of next channel buffer to preserve requested # buffersize. @@ -262,24 +266,28 @@ test chan-io-3.5 {WriteChars: saved != 0} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { +} -cleanup { + catch {chan close $f} +} -result [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} -body { # One incomplete UTF-8 character at end of staging buffer. Backup in src # to the beginning of that UTF-8 character and try again. # # Translate the first 16 bytes, produce 14 bytes of output, 2 left over - # (first two bytes of \uff21 in UTF-8). Given those two bytes try + # (first two bytes of \uFF21 in UTF-8). Given those two bytes try # translating them again, find that no bytes are read produced, and break # to outer loop where those two bytes will have the remaining 4 bytes (the - # last byte of \uff21 plus the all of \uff22) appended. + # last byte of \uFF21 plus the all of \uFF22) appended. set f [open $path(test1) w] chan configure $f -encoding shiftjis -buffersize 16 - chan puts -nonewline $f "12345678901234\uff21\uff22" + chan puts -nonewline $f 12345678901234\uFF21\uFF22 set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] -test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { +} -cleanup { + catch {chan close $f} +} -result [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] +test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # When translating UTF-8 to external, the produced bytes went past end of # the channel buffer. This is done on purpose - we then truncate the bytes # at the end of the partial character to preserve the requested blocksize @@ -291,8 +299,10 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test chan-io-3.8 {WriteChars: reset sawLF after each buffer} { +} -cleanup { + catch {chan close $f} +} -result [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +test chan-io-3.8 {WriteChars: reset sawLF after each buffer} -body { set f [open $path(test1) w] chan configure $f -encoding ascii -buffering line -translation lf \ -buffersize 16 @@ -300,7 +310,9 @@ test chan-io-3.8 {WriteChars: reset sawLF after each buffer} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] +} -cleanup { + catch {chan close $f} +} -result [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] test chan-io-4.1 {TranslateOutputEOL: lf} { # search for \n @@ -416,7 +428,7 @@ test chan-io-6.3 {Tcl_GetsObj: how many have we used?} -body { test chan-io-6.4 {Tcl_GetsObj: encoding == NULL} -body { set f [open $path(test1) w] chan configure $f -translation binary - chan puts $f "\x81\u1234\0" + chan puts $f "\x81\u1234\x00" chan close $f set f [open $path(test1)] chan configure $f -translation binary @@ -427,14 +439,14 @@ test chan-io-6.4 {Tcl_GetsObj: encoding == NULL} -body { test chan-io-6.5 {Tcl_GetsObj: encoding != NULL} -body { set f [open $path(test1) w] chan configure $f -translation binary - chan puts $f "\x88\xea\x92\x9a" + chan puts $f "\x88\xEA\x92\x9A" chan close $f set f [open $path(test1)] chan configure $f -encoding shiftjis list [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 2 "\u4e00\u4e01"] +} -result [list 2 "\u4E00\u4E01"] set a "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb" append a $a append a $a @@ -462,20 +474,20 @@ test chan-io-6.7 {Tcl_GetsObj: error in input} -constraints stdio -body { } -result {-1} test chan-io-6.8 {Tcl_GetsObj: remember if EOF is seen} -body { set f [open $path(test1) w] - chan puts $f "abcdef\x1aghijk\nwombat" + chan puts $f "abcdef\x1Aghijk\nwombat" chan close $f set f [open $path(test1)] - chan configure $f -eofchar \x1a + chan configure $f -eofchar \x1A list [chan gets $f line] $line [chan gets $f line] $line } -cleanup { chan close $f } -result {6 abcdef -1 {}} test chan-io-6.9 {Tcl_GetsObj: remember if EOF is seen} -body { set f [open $path(test1) w] - chan puts $f "abcdefghijk\nwom\u001abat" + chan puts $f "abcdefghijk\nwom\u001Abat" chan close $f set f [open $path(test1)] - chan configure $f -eofchar \x1a + chan configure $f -eofchar \x1A list [chan gets $f line] $line [chan gets $f line] $line } -cleanup { chan close $f @@ -860,7 +872,7 @@ test chan-io-6.43 {Tcl_GetsObj: input saw cr} -setup { chan configure $f -blocking 0 lappend x [chan gets $f line] $line [testchannel queuedcr $f] chan configure $f -blocking 1 - chan puts -nonewline $f "\nabcd\refg\x1a" + chan puts -nonewline $f "\nabcd\refg\x1A" lappend x [chan gets $f line] $line [testchannel queuedcr $f] lappend x [chan gets $f line] $line } -cleanup { @@ -878,7 +890,7 @@ test chan-io-6.44 {Tcl_GetsObj: input saw cr, not followed by cr} -setup { chan configure $f -blocking 0 lappend x [chan gets $f line] $line [testchannel queuedcr $f] chan configure $f -blocking 1 - chan puts -nonewline $f "abcd\refg\x1a" + chan puts -nonewline $f "abcd\refg\x1A" lappend x [chan gets $f line] $line [testchannel queuedcr $f] lappend x [chan gets $f line] $line } -cleanup { @@ -914,7 +926,7 @@ test chan-io-6.46 {Tcl_GetsObj: input saw cr, followed by just \n should give eo chan configure $f -blocking 0 lappend x [chan gets $f line] $line [testchannel queuedcr $f] chan configure $f -blocking 1 - chan puts -nonewline $f "\n\x1a" + chan puts -nonewline $f "\n\x1A" lappend x [chan gets $f line] $line [testchannel queuedcr $f] } -cleanup { chan close $f @@ -980,10 +992,10 @@ test chan-io-6.52 {Tcl_GetsObj: saw EOF character} -constraints {testchannel} -b # if (eof != NULL) set f [open $path(test1) w] chan configure $f -translation lf - chan puts -nonewline $f "123456\x1ak9012345\r" + chan puts -nonewline $f "123456\x1Ak9012345\r" chan close $f set f [open $path(test1)] - chan configure $f -eofchar \x1a + chan configure $f -eofchar \x1A list [chan gets $f] [testchannel queuedcr $f] [chan tell $f] [chan gets $f] } -cleanup { chan close $f @@ -1011,14 +1023,14 @@ test chan-io-6.55 {Tcl_GetsObj: overconverted} -body { # Tcl_ExternalToUtf(), make sure state updated set f [open $path(test1) w] chan configure $f -encoding iso2022-jp - chan puts $f "there\u4e00ok\n\u4e01more bytes\nhere" + chan puts $f "there\u4E00ok\n\u4E01more bytes\nhere" chan close $f set f [open $path(test1)] chan configure $f -encoding iso2022-jp list [chan gets $f line] $line [chan gets $f line] $line [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 8 "there\u4e00ok" 11 "\u4e01more bytes" 4 "here"] +} -result [list 8 "there\u4E00ok" 11 "\u4E01more bytes" 4 "here"] test chan-io-6.56 {Tcl_GetsObj: incomplete lines should disable file events} -setup { update variable x {} @@ -1052,19 +1064,19 @@ test chan-io-7.1 {FilterInputBytes: split up character at end of buffer} -body { # (result == TCL_CONVERT_MULTIBYTE) set f [open $path(test1) w] chan configure $f -encoding shiftjis - chan puts $f "1234567890123\uff10\uff11\uff12\uff13\uff14\nend" + chan puts $f "1234567890123\uFF10\uFF11\uFF12\uFF13\uFF14\nend" chan close $f set f [open $path(test1)] chan configure $f -encoding shiftjis -buffersize 16 chan gets $f } -cleanup { chan close $f -} -result "1234567890123\uff10\uff11\uff12\uff13\uff14" +} -result "1234567890123\uFF10\uFF11\uFF12\uFF13\uFF14" test chan-io-7.2 {FilterInputBytes: split up character in middle of buffer} -body { # (bufPtr->nextAdded < bufPtr->bufLength) set f [open $path(test1) w] chan configure $f -encoding binary - chan puts -nonewline $f "1234567890\n123\x82\x4f\x82\x50\x82" + chan puts -nonewline $f "1234567890\n123\x82\x4F\x82\x50\x82" chan close $f set f [open $path(test1)] chan configure $f -encoding shiftjis @@ -1077,7 +1089,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { } -constraints {testchannel} -body { set f [open $path(test1) w] chan configure $f -encoding binary - chan puts -nonewline $f "1234567890123\x82\x4f\x82\x50\x82" + chan puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" chan close $f set f [open $path(test1)] chan configure $f -encoding shiftjis @@ -1086,13 +1098,13 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "1234567890123\uff10\uff11" 18 0 1 -1 ""] +} -result [list 15 "1234567890123\uFF10\uFF11" 18 0 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { set f [openpipe w+ $path(cat)] chan configure $f -encoding binary -buffering none - chan puts -nonewline $f "1234567890123\x82\x4f\x82\x50\x82" + chan puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" chan configure $f -encoding shiftjis -blocking 0 chan event $f read [namespace code { lappend x [chan gets $f line] $line [chan blocked $f] @@ -1105,7 +1117,7 @@ test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { return $x } -cleanup { chan close $f -} -result [list -1 "" 1 17 "1234567890123\uff10\uff11\uff12\uff13" 0] +} -result [list -1 "" 1 17 "1234567890123\uFF10\uFF11\uFF12\uFF13" 0] test chan-io-8.1 {PeekAhead: only go to device if no more cached data} -constraints {testchannel} -body { # (bufPtr->nextPtr == NULL) @@ -1200,7 +1212,7 @@ test chan-io-8.7 {PeekAhead: cleanup} -setup { chan puts -nonewline $f "abcdefghijklmno\r" # here lappend x [chan gets $f line] $line [testchannel queuedcr $f] - chan puts -nonewline $f "\x1a" + chan puts -nonewline $f \x1A lappend x [chan gets $f line] $line } -cleanup { chan close $f @@ -1356,22 +1368,22 @@ test chan-io-12.4 {ReadChars: split-up char} -setup { chan configure $f -encoding shiftjis vwait [namespace which -variable x] chan configure $f -encoding binary -blocking 1 - chan puts -nonewline $f "\x7b" + chan puts -nonewline $f \x7B after 500 ;# Give the cat process time to catch up chan configure $f -encoding shiftjis -blocking 0 vwait [namespace which -variable x] return $x } -cleanup { chan close $f -} -result [list "123456789012345" 1 "\u672c" 0] +} -result [list "123456789012345" 1 \u672C 0] test chan-io-12.5 {ReadChars: chan events on partial characters} -setup { variable x {} } -constraints {stdio fileevent} -body { set path(test1) [makeFile { chan configure stdout -encoding binary -buffering none - chan gets stdin; chan puts -nonewline "\xe7" - chan gets stdin; chan puts -nonewline "\x89" - chan gets stdin; chan puts -nonewline "\xa6" + chan gets stdin; chan puts -nonewline \xE7 + chan gets stdin; chan puts -nonewline \x89 + chan gets stdin; chan puts -nonewline \xA6 } test1] set f [openpipe r+ $path(test1)] chan event $f readable [namespace code { @@ -1525,7 +1537,7 @@ test chan-io-13.10 {TranslateInputEOL: auto mode: \n} -body { chan close $f } -result "abcd\ndef" test chan-io-13.11 {TranslateInputEOL: EOF char} -body { - # (*chanPtr->inEofChar != '\0') + # (*chanPtr->inEofChar != '\x00') set f [open $path(test1) w] chan configure $f -translation lf chan puts -nonewline $f "abcd\ndefgh" @@ -1537,7 +1549,7 @@ test chan-io-13.11 {TranslateInputEOL: EOF char} -body { chan close $f } -result "abcd\nd" test chan-io-13.12 {TranslateInputEOL: find EOF char in src} -body { - # (*chanPtr->inEofChar != '\0') + # (*chanPtr->inEofChar != '\x00') set f [open $path(test1) w] chan configure $f -translation lf chan puts -nonewline $f "\r\n\r\n\r\nab\r\n\r\ndef\r\n\r\n\r\n" @@ -1873,7 +1885,7 @@ test chan-io-20.2 {Tcl_CreateChannel: initial settings} -constraints {win} -body list [chan configure $f -eofchar] [chan configure $f -translation] } -cleanup { chan close $f -} -result [list [list \x1a ""] {auto crlf}] +} -result [list [list \x1A ""] {auto crlf}] test chan-io-20.3 {Tcl_CreateChannel: initial settings} -constraints {unix} -body { set f [open $path(test1) w+] list [chan configure $f -eofchar] [chan configure $f -translation] @@ -3086,10 +3098,10 @@ test chan-io-30.16 {Tcl_Write ^Z at end, Tcl_Read auto} -setup { } -body { set f [open $path(test1) w] chan configure $f -translation lf - chan puts -nonewline $f hello\nthere\nand\rhere\n\x1a + chan puts -nonewline $f hello\nthere\nand\rhere\n\x1A chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A chan read $f } -cleanup { chan close $f @@ -3102,11 +3114,11 @@ test chan-io-30.17 {Tcl_Write, implicit ^Z at end, Tcl_Read auto} -setup { file delete $path(test1) } -constraints {win} -body { set f [open $path(test1) w] - chan configure $f -eofchar \x1a -translation lf + chan configure $f -translation lf -eofchar \x1A chan puts $f hello\nthere\nand\rhere chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A chan read $f } -cleanup { chan close $f @@ -3124,7 +3136,7 @@ test chan-io-30.18 {Tcl_Write, ^Z in middle, Tcl_Read auto} -setup { chan puts $f $s chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A set l "" lappend l [chan gets $f] lappend l [chan gets $f] @@ -3145,7 +3157,7 @@ test chan-io-30.19 {Tcl_Write, ^Z no newline in middle, Tcl_Read auto} -setup { chan puts $f $s chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A set l "" lappend l [chan gets $f] lappend l [chan gets $f] @@ -3178,7 +3190,7 @@ test chan-io-30.20 {Tcl_Write, ^Z in middle ignored, Tcl_Read lf} -setup { lappend l [chan eof $f] } -cleanup { chan close $f -} -result "abc def 0 \x1aghi 0 qrs 0 {} 1" +} -result "abc def 0 \x1Aghi 0 qrs 0 {} 1" test chan-io-30.21 {Tcl_Write, ^Z in middle ignored, Tcl_Read cr} -setup { file delete $path(test1) set l "" @@ -3190,7 +3202,7 @@ test chan-io-30.21 {Tcl_Write, ^Z in middle ignored, Tcl_Read cr} -setup { set f [open $path(test1) r] chan configure $f -translation cr -eofchar {} set x [chan gets $f] - lappend l [string equal $x "abc\ndef\n\x1aghi\nqrs\n"] + lappend l [string equal $x "abc\ndef\n\x1Aghi\nqrs\n"] lappend l [chan eof $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3208,7 +3220,7 @@ test chan-io-30.22 {Tcl_Write, ^Z in middle ignored, Tcl_Read crlf} -setup { set f [open $path(test1) r] chan configure $f -translation crlf -eofchar {} set x [chan gets $f] - lappend l [string equal $x "abc\ndef\n\x1aghi\nqrs\n"] + lappend l [string equal $x "abc\ndef\n\x1Aghi\nqrs\n"] lappend l [chan eof $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3223,7 +3235,7 @@ test chan-io-30.23 {Tcl_Write lf, ^Z in middle, Tcl_Read auto} -setup { chan puts $f [format abc\ndef\n%cqrs\ntuv 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3237,7 +3249,7 @@ test chan-io-30.24 {Tcl_Write lf, ^Z in middle, Tcl_Read lf} -setup { chan puts $f $c chan close $f set f [open $path(test1) r] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3251,7 +3263,7 @@ test chan-io-30.25 {Tcl_Write cr, ^Z in middle, Tcl_Read auto} -setup { chan puts $f $c chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3265,7 +3277,7 @@ test chan-io-30.26 {Tcl_Write cr, ^Z in middle, Tcl_Read cr} -setup { chan puts $f $c chan close $f set f [open $path(test1) r] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3279,7 +3291,7 @@ test chan-io-30.27 {Tcl_Write crlf, ^Z in middle, Tcl_Read auto} -setup { chan puts $f $c chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3293,7 +3305,7 @@ test chan-io-30.28 {Tcl_Write crlf, ^Z in middle, Tcl_Read crlf} -setup { chan puts $f $c chan close $f set f [open $path(test1) r] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A list [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -3644,7 +3656,7 @@ test chan-io-31.18 {Tcl_Write ^Z at end, Tcl_Gets auto} -setup { chan puts $f [format "hello\nthere\nand\rhere\n\%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan gets $f] @@ -3660,11 +3672,11 @@ test chan-io-31.19 {Tcl_Write, implicit ^Z at end, Tcl_Gets auto} -setup { set l "" } -body { set f [open $path(test1) w] - chan configure $f -eofchar \x1a -translation lf + chan configure $f -translation lf -eofchar \x1A chan puts $f hello\nthere\nand\rhere chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan gets $f] @@ -3684,8 +3696,7 @@ test chan-io-31.20 {Tcl_Write, ^Z in middle, Tcl_Gets auto, eofChar} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a - chan configure $f -translation auto + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3703,7 +3714,7 @@ test chan-io-31.21 {Tcl_Write, no newline ^Z in middle, Tcl_Gets auto, eofChar} chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3733,7 +3744,7 @@ test chan-io-31.22 {Tcl_Write, ^Z in middle ignored, Tcl_Gets lf} -setup { lappend l [chan eof $f] } -cleanup { chan close $f -} -result "abc def 0 \x1aqrs 0 tuv 0 {} 1" +} -result "abc def 0 \x1Aqrs 0 tuv 0 {} 1" test chan-io-31.23 {Tcl_Write, ^Z in middle ignored, Tcl_Gets cr} -setup { file delete $path(test1) set l "" @@ -3755,7 +3766,7 @@ test chan-io-31.23 {Tcl_Write, ^Z in middle ignored, Tcl_Gets cr} -setup { lappend l [chan eof $f] } -cleanup { chan close $f -} -result "abc def 0 \x1aqrs 0 tuv 0 {} 1" +} -result "abc def 0 \x1Aqrs 0 tuv 0 {} 1" test chan-io-31.24 {Tcl_Write, ^Z in middle ignored, Tcl_Gets crlf} -setup { file delete $path(test1) set l "" @@ -3777,7 +3788,7 @@ test chan-io-31.24 {Tcl_Write, ^Z in middle ignored, Tcl_Gets crlf} -setup { lappend l [chan eof $f] } -cleanup { chan close $f -} -result "abc def 0 \x1aqrs 0 tuv 0 {} 1" +} -result "abc def 0 \x1Aqrs 0 tuv 0 {} 1" test chan-io-31.25 {Tcl_Write lf, ^Z in middle, Tcl_Gets auto} -setup { file delete $path(test1) set l "" @@ -3787,7 +3798,7 @@ test chan-io-31.25 {Tcl_Write lf, ^Z in middle, Tcl_Gets auto} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3805,7 +3816,7 @@ test chan-io-31.26 {Tcl_Write lf, ^Z in middle, Tcl_Gets lf} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3823,7 +3834,7 @@ test chan-io-31.27 {Tcl_Write cr, ^Z in middle, Tcl_Gets auto} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3841,7 +3852,7 @@ test chan-io-31.28 {Tcl_Write cr, ^Z in middle, Tcl_Gets cr} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3859,7 +3870,7 @@ test chan-io-31.29 {Tcl_Write crlf, ^Z in middle, Tcl_Gets auto} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -3877,7 +3888,7 @@ test chan-io-31.30 {Tcl_Write crlf, ^Z in middle, Tcl_Gets crlf} -setup { chan puts $f [format "abc\ndef\n%cqrs\ntuv" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A lappend l [chan gets $f] lappend l [chan gets $f] lappend l [chan eof $f] @@ -4633,12 +4644,12 @@ test chan-io-35.6 {Tcl_Eof, eof char, lf write, auto read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4647,12 +4658,12 @@ test chan-io-35.7 {Tcl_Eof, eof char, lf write, lf read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4661,12 +4672,12 @@ test chan-io-35.8 {Tcl_Eof, eof char, cr write, auto read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4675,12 +4686,12 @@ test chan-io-35.9 {Tcl_Eof, eof char, cr write, cr read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4689,12 +4700,12 @@ test chan-io-35.10 {Tcl_Eof, eof char, crlf write, auto read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4703,12 +4714,12 @@ test chan-io-35.11 {Tcl_Eof, eof char, crlf write, crlf read} -setup { file delete $path(test1) } -body { set f [open $path(test1) w] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A chan puts $f abc\ndef chan close $f set s [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A list $s [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4722,7 +4733,7 @@ test chan-io-35.12 {Tcl_Eof, eof char in middle, lf write, auto read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4736,7 +4747,7 @@ test chan-io-35.13 {Tcl_Eof, eof char in middle, lf write, lf read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4750,7 +4761,7 @@ test chan-io-35.14 {Tcl_Eof, eof char in middle, cr write, auto read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4764,7 +4775,7 @@ test chan-io-35.15 {Tcl_Eof, eof char in middle, cr write, cr read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4778,7 +4789,7 @@ test chan-io-35.16 {Tcl_Eof, eof char in middle, crlf write, auto read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -4792,7 +4803,7 @@ test chan-io-35.17 {Tcl_Eof, eof char in middle, crlf write, crlf read} -setup { chan close $f set c [file size $path(test1)] set f [open $path(test1) r] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A list $c [string length [chan read $f]] [chan eof $f] } -cleanup { chan close $f @@ -5162,7 +5173,7 @@ test chan-io-39.14 {Tcl_SetChannelOption: -encoding, binary & utf-8} -setup { } -body { set f [open $path(test1) w] chan configure $f -encoding {} - chan puts -nonewline $f \xe7\x89\xa6 + chan puts -nonewline $f \xE7\x89\xA6 chan close $f set f [open $path(test1) r] chan configure $f -encoding utf-8 @@ -5175,7 +5186,7 @@ test chan-io-39.15 {Tcl_SetChannelOption: -encoding, binary & utf-8} -setup { } -body { set f [open $path(test1) w] chan configure $f -encoding binary - chan puts -nonewline $f \xe7\x89\xa6 + chan puts -nonewline $f \xE7\x89\xA6 chan close $f set f [open $path(test1) r] chan configure $f -encoding utf-8 @@ -5196,7 +5207,7 @@ test chan-io-39.17 {Tcl_SetChannelOption: -encoding, clearing CHANNEL_NEED_MORE_ } -constraints {stdio fileevent} -body { set f [openpipe r+ $path(cat)] chan configure $f -encoding binary - chan puts -nonewline $f "\xe7" + chan puts -nonewline $f \xE7 chan flush $f chan configure $f -encoding utf-8 -blocking 0 chan event $f readable [namespace code { lappend x [chan read $f] }] @@ -5214,7 +5225,7 @@ test chan-io-39.17 {Tcl_SetChannelOption: -encoding, clearing CHANNEL_NEED_MORE_ return $x } -cleanup { chan close $f -} -result "{} timeout {} timeout \xe7 timeout" +} -result "{} timeout {} timeout \xE7 timeout" test chan-io-39.18 {Tcl_SetChannelOption, setting read mode independently} \ -constraints {socket} -body { proc accept {s a p} {chan close $s} @@ -5333,7 +5344,7 @@ test chan-io-40.1 {POSIX open access modes: RDWR} -setup { } -result {zzy abzzy} test chan-io-40.2 {POSIX open access modes: CREAT} -setup { file delete $path(test3) -} -constraints {unix} -body { +} -constraints {unix notWsl} -body { set f [open $path(test3) {WRONLY CREAT} 0o600] file stat $path(test3) stats set x [format 0o%03o [expr {$stats(mode) & 0o777}]] @@ -5346,11 +5357,11 @@ test chan-io-40.2 {POSIX open access modes: CREAT} -setup { } -result {0o600 {line 1}} test chan-io-40.3 {POSIX open access modes: CREAT} -setup { file delete $path(test3) -} -constraints {unix umask} -body { +} -constraints {unix umask notWsl} -body { # This test only works if your umask is 2, like ouster's. chan close [open $path(test3) {WRONLY CREAT}] file stat $path(test3) stats - format "0o%03o" [expr {$stats(mode) & 0o777}] + format 0o%03o [expr {$stats(mode) & 0o777}] } -result [format 0o%03o [expr {0o666 & ~ $umaskValue}]] test chan-io-40.4 {POSIX open access modes: CREAT} -setup { file delete $path(test3) @@ -5528,11 +5539,11 @@ test chan-io-42.2 {Tcl_FileeventCmd: replacing} {fileevent} { } {{first script} {new script} {yet another} {}} test chan-io-42.3 {Tcl_FileeventCmd: replacing, with NULL chars in script} {fileevent} { set result {} - chan event $f r "first scr\0ipt" + chan event $f r "first scr\x00ipt" lappend result [string length [chan event $f readable]] - chan event $f r "new scr\0ipt" + chan event $f r "new scr\x00ipt" lappend result [string length [chan event $f readable]] - chan event $f r "yet ano\0ther" + chan event $f r "yet ano\x00ther" lappend result [string length [chan event $f readable]] chan event $f r "" lappend result [chan event $f readable] @@ -5978,7 +5989,7 @@ test chan-io-48.4 {lf write, testing readability, ^Z termination, auto read mode chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6002,7 +6013,7 @@ test chan-io-48.5 {lf write, testing readability, ^Z in middle, auto read mode} chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6026,7 +6037,7 @@ test chan-io-48.6 {cr write, testing readability, ^Z termination, auto read mode chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6050,7 +6061,7 @@ test chan-io-48.7 {cr write, testing readability, ^Z in middle, auto read mode} chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6074,7 +6085,7 @@ test chan-io-48.8 {crlf write, testing readability, ^Z termination, auto read mo chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation auto -eofchar \x1a + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6098,7 +6109,7 @@ test chan-io-48.9 {crlf write, testing readability, ^Z in middle, auto read mode chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation auto + chan configure $f -translation auto -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6122,7 +6133,7 @@ test chan-io-48.10 {lf write, testing readability, ^Z in middle, lf read mode} - chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation lf + chan configure $f -translation lf -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6146,7 +6157,7 @@ test chan-io-48.11 {lf write, testing readability, ^Z termination, lf read mode} chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation lf -eofchar \x1a + chan configure $f -translation lf -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6170,7 +6181,7 @@ test chan-io-48.12 {cr write, testing readability, ^Z in middle, cr read mode} - chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation cr + chan configure $f -translation cr -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6194,7 +6205,7 @@ test chan-io-48.13 {cr write, testing readability, ^Z termination, cr read mode} chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation cr -eofchar \x1a + chan configure $f -translation cr -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6218,7 +6229,7 @@ test chan-io-48.14 {crlf write, testing readability, ^Z in middle, crlf read mod chan puts -nonewline $f [format "abc\ndef\n%cfoo\nbar\n" 26] chan close $f set f [open $path(test1) r] - chan configure $f -eofchar \x1a -translation crlf + chan configure $f -translation crlf -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6242,7 +6253,7 @@ test chan-io-48.15 {crlf write, testing readability, ^Z termi, crlf read mode} - chan puts -nonewline $f [format "abc\ndef\n%c" 26] chan close $f set f [open $path(test1) r] - chan configure $f -translation crlf -eofchar \x1a + chan configure $f -translation crlf -eofchar \x1A chan event $f readable [namespace code { if {[chan eof $f]} { set x done @@ -6636,8 +6647,8 @@ test chan-io-52.3 {TclCopyChannel} -constraints {fcopy} -setup { } -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation cr -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation cr -encoding iso8859-1 -blocking 0 set s0 [chan copy $f1 $f2] set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6667,8 +6678,8 @@ test chan-io-52.5 {TclCopyChannel, all} -constraints {fcopy} -setup { } -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -1 ;# -1 means 'copy all', same as if no -size specified. set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6683,8 +6694,8 @@ test chan-io-52.5a {TclCopyChannel, all, other negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size -2 ;# < 0 behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6699,8 +6710,8 @@ test chan-io-52.5b {TclCopyChannel, all, wrap to negative value} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -size 3221176172 ;# Wrapped to < 0, behaves like -1, copy all set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6715,8 +6726,8 @@ test chan-io-52.6 {TclCopyChannel} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 set s0 [chan copy $f1 $f2 -size [expr {[file size $thisScript] + 5}]] set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] chan close $f1 @@ -6733,8 +6744,8 @@ test chan-io-52.7 {TclCopyChannel} -constraints {fcopy} -setup { } -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation lf -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation lf -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] if {[file size $thisScript] == [file size $path(test1)]} { @@ -6779,7 +6790,7 @@ set path(utf8-rp.txt) [makeFile {} utf8-rp.txt] # Create kyrillic file, use lf translation to avoid os eol issues set out [open $path(kyrillic.txt) w] chan configure $out -encoding koi8-r -translation lf -chan puts $out "\u0410\u0410" +chan puts $out \u0410\u0410 chan close $out test chan-io-52.9 {TclCopyChannel & encodings} {fcopy} { # Copy kyrillic to UTF-8, using chan copy. @@ -6817,7 +6828,7 @@ test chan-io-52.10 {TclCopyChannel & encodings} {fcopy} { test chan-io-52.11 {TclCopyChannel & encodings} -setup { set f [open $path(utf8-fcopy.txt) w] fconfigure $f -encoding utf-8 -translation lf - puts $f "\u0410\u0410" + puts $f \u0410\u0410 close $f } -constraints {fcopy} -body { # binary to encoding => the input has to be in utf-8 to make sense to the @@ -6851,8 +6862,8 @@ test chan-io-53.2 {CopyData} -setup { } -constraints {fcopy} -body { set f1 [open $thisScript] set f2 [open $path(test1) w] - chan configure $f1 -translation lf -blocking 0 - chan configure $f2 -translation cr -blocking 0 + chan configure $f1 -translation lf -encoding iso8859-1 -blocking 0 + chan configure $f2 -translation cr -encoding iso8859-1 -blocking 0 chan copy $f1 $f2 -command [namespace code {set s0}] set result [list [chan configure $f1 -blocking] [chan configure $f2 -blocking]] variable s0 @@ -7491,7 +7502,7 @@ test chan-io-60.1 {writing illegal utf sequences} {fileevent testbytestring} { set out [open $path(script) w] chan puts $out "catch {load $::tcltestlib Tcltest}" chan puts $out { - chan puts [testbytestring \xe2] + chan puts [testbytestring \xE2] exit 1 } proc readit {pipe} { diff --git a/tests/cmdAH.test b/tests/cmdAH.test index bb3ad98..8d36594 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -4,8 +4,8 @@ # commands. Sourcing this file into Tcl runs the tests and generates output # for errors. No output means no errors were found. # -# Copyright (c) 1996-1998 by Sun Microsystems, Inc. -# Copyright (c) 1998-1999 by Scriptics Corporation. +# Copyright (c) 1996-1998 Sun Microsystems, Inc. +# Copyright (c) 1998-1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -30,6 +30,7 @@ testConstraint linkDirectory [expr { ($::tcl_platform(osVersion) >= 5.0 && [lindex [file system [temporaryDirectory]] 1] eq "NTFS") }] +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] global env set cmdAHwd [pwd] @@ -148,10 +149,10 @@ test cmdAH-2.6.2 {cd} -constraints {unix nonPortable} -setup { test cmdAH-2.6.3 {Tcl_CdObjCmd, bug #3118489} -setup { set dir [pwd] } -returnCodes error -body { - cd .\0 + cd .\x00 } -cleanup { cd $dir -} -match glob -result "couldn't change working directory to \".\0\": *" +} -match glob -result "couldn't change working directory to \".\x00\": *" test cmdAH-2.7 {Tcl_ConcatObjCmd} { concat } {} @@ -261,7 +262,7 @@ test cmdAH-6.3 {Tcl_FileObjCmd: volumes} -constraints unix -body { test cmdAH-6.4 {Tcl_FileObjCmd: volumes} -constraints win -body { set volumeList [string tolower [file volumes]] set element [lsearch -exact $volumeList "c:/"] - list [expr {$element>-1}] [glob -nocomplain [lindex $volumeList $element]*] + list [expr {$element>=0}] [glob -nocomplain [lindex $volumeList $element]*] } -match glob -result {1 *} # attributes @@ -849,7 +850,7 @@ test cmdAH-16.2 {Tcl_FileObjCmd: readable} { -result 1 } test cmdAH-16.3 {Tcl_FileObjCmd: readable} { - -constraints {unix notRoot testchmod} + -constraints {unix notRoot testchmod notWsl} -setup {testchmod 0o333 $gorpfile} -body {file readable $gorpfile} -result 0 @@ -882,7 +883,7 @@ set gorpfile [makeFile abcde gorp.file] test cmdAH-18.1 {Tcl_FileObjCmd: executable} -returnCodes error -body { file executable a b } -result {wrong # args: should be "file executable name"} -test cmdAH-18.2 {Tcl_FileObjCmd: executable} {notRoot} { +test cmdAH-18.2 {Tcl_FileObjCmd: executable} {notRoot notWsl} { file executable $gorpfile } 0 test cmdAH-18.3 {Tcl_FileObjCmd: executable} {unix testchmod} { @@ -1430,7 +1431,7 @@ test cmdAH-28.4 {Tcl_FileObjCmd: stat} -setup { file stat $gorpfile stat list $stat(nlink) $stat(size) $stat(type) } -result {1 12 file} -test cmdAH-28.5 {Tcl_FileObjCmd: stat} -constraints {unix} -setup { +test cmdAH-28.5 {Tcl_FileObjCmd: stat} -constraints {unix notWsl} -setup { unset -nocomplain stat } -body { file stat $gorpfile stat diff --git a/tests/fCmd.test b/tests/fCmd.test index ecb1d04..fcf5cbe 100644 --- a/tests/fCmd.test +++ b/tests/fCmd.test @@ -5,7 +5,7 @@ # for errors. No output means no errors were found. # # Copyright (c) 1996-1997 Sun Microsystems, Inc. -# Copyright (c) 1999 by Scriptics Corporation. +# Copyright (c) 1999 Scriptics Corporation. # # See the file "license.terms" for information on usage and redistribution of # this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -29,7 +29,7 @@ testConstraint winLessThan10 0 testConstraint notNetworkFilesystem 0 testConstraint reg 0 if {[testConstraint win]} { - catch { + if {[catch { # Is the registry extension already static to this shell? try { load {} Registry @@ -40,9 +40,14 @@ if {[testConstraint win]} { load $::reglib Registry } testConstraint reg 1 + } regError]} { + catch {package require registry; testConstraint reg 1} } } +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] + set tmpspace /tmp;# default value # Find a group that exists on this Unix system, or else skip tests that # require Unix groups. @@ -281,7 +286,7 @@ test fCmd-3.14 {FileCopyRename: FileBasename fails} -setup { file mkdir td1 file rename ~_totally_bogus_user td1 } -result {user "_totally_bogus_user" doesn't exist} -test fCmd-3.15 {FileCopyRename: source[0] == '\0'} -setup { +test fCmd-3.15 {FileCopyRename: source[0] == '\x00'} -setup { cleanup } -constraints {notRoot unixOrWin} -returnCodes error -body { file mkdir td1 @@ -323,7 +328,7 @@ test fCmd-4.4 {TclFileMakeDirsCmd: Tcl_TranslateFileName fails} -setup { } -constraints {notRoot} -returnCodes error -body { file mkdir ~_totally_bogus_user } -result {user "_totally_bogus_user" doesn't exist} -test fCmd-4.5 {TclFileMakeDirsCmd: Tcl_SplitPath returns 0: *name == '\0'} -setup { +test fCmd-4.5 {TclFileMakeDirsCmd: Tcl_SplitPath returns 0: *name == '\x00'} -setup { cleanup } -constraints {notRoot} -returnCodes error -body { file mkdir "" @@ -364,7 +369,7 @@ test fCmd-4.10 {TclFileMakeDirsCmd: exists, is dir} -setup { } -result {1 1} test fCmd-4.11 {TclFileMakeDirsCmd: doesn't exist: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -returnCodes error -body { +} -constraints {unix notRoot testchmod notWsl} -returnCodes error -body { file mkdir td1/td2/td3 testchmod 0 td1/td2 file mkdir td1/td2/td3/td4 @@ -382,7 +387,7 @@ test fCmd-4.13 {TclFileMakeDirsCmd: doesn't exist: errno == ENOENT} -setup { test fCmd-4.14 {TclFileMakeDirsCmd: TclpCreateDirectory fails} -setup { cleanup file delete -force foo -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo file attr foo -perm 0o40000 file mkdir foo/tf1 @@ -394,7 +399,7 @@ test fCmd-4.16 {TclFileMakeDirsCmd: TclpCreateDirectory succeeds} -setup { } -constraints {notRoot} -body { file mkdir tf1 file exists tf1 -} -result {1} +} -result 1 test fCmd-5.1 {TclFileDeleteCmd: FileForceOption fails} -constraints {notRoot} -body { file delete -xyz @@ -507,7 +512,7 @@ test fCmd-6.5 {CopyRenameOneFile: lstat(target) != 0} -setup { } -result {tf2} test fCmd-6.6 {CopyRenameOneFile: errno != ENOENT} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 testchmod 0 td1 createfile tf1 @@ -626,7 +631,7 @@ test fCmd-6.22 {CopyRenameOneFile: copy/rename: !S_ISDIR(source)} -setup { } -result [file join $tmpspace tf1] test fCmd-6.23 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {xdev notRoot} -body { +} -constraints {xdev notRoot notWsl} -body { file mkdir td1/td2/td3 file attributes td1 -permissions 0o000 file rename td1 $tmpspace @@ -678,7 +683,7 @@ test fCmd-6.27 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { } -match glob -result {error renaming "td1" to "/tmp/tcl*/td1": file already exists} test fCmd-6.28 {CopyRenameOneFile: TclpCopyDirectory failed} -setup { cleanup $tmpspace -} -constraints {notRoot xdev} -body { +} -constraints {notRoot xdev notWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2/td3 -permissions 0o000 file rename td1 $tmpspace @@ -695,7 +700,7 @@ test fCmd-6.29 {CopyRenameOneFile: TclpCopyDirectory passed} -setup { } -result [file join $tmpspace td1 td2] test fCmd-6.30 {CopyRenameOneFile: TclpRemoveDirectory failed} -setup { cleanup $tmpspace -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar $tmpspace @@ -770,7 +775,7 @@ test fCmd-8.3 {file copy and path translation: ensure correct error} -body { test fCmd-9.1 {file rename: comprehensive: EACCES} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir td1 file mkdir td2 file attr td2 -perm 0o40000 @@ -807,7 +812,7 @@ test fCmd-9.4.a {file rename: comprehensive: dir to new name} -setup { } -result {{td3 td4} 1 0} test fCmd-9.4.b {file rename: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod notDarwin9} -body { +} -constraints {unix notRoot testchmod notDarwin9 notWsl} -body { file mkdir td1 td2 testchmod 0o555 td2 file rename td1 td3 @@ -838,7 +843,7 @@ test fCmd-9.6.a {file rename: comprehensive: dir to self} -setup { } -result {{td1 td2} 1 0} test fCmd-9.6.b {file rename: comprehensive: dir to self} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 file mkdir td2 testchmod 0o555 td2 @@ -1046,7 +1051,7 @@ test fCmd-10.2 {file copy: comprehensive: file to new name} -setup { } -result {{tf1 tf2 tf3 tf4} tf1 tf2 1 0} test fCmd-10.3 {file copy: comprehensive: dir to new name} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir [file join td1 tdx] file mkdir [file join td2 tdy] testchmod 0o555 td2 @@ -1133,7 +1138,7 @@ test fCmd-10.5 {file copy: comprehensive: dir to empty dir} -setup { } -result [subst {{td1 td2 tdd1 tdd2 tdd3 tdd4 tds1 tds2 tds3 tds4} {1 {error copying "td1" to "[file join td2 td1]": file already exists}} {1 {error copying "tds1" to "[file join tdd1 tds1]": file already exists}} 1 1 1}] test fCmd-10.6 {file copy: comprehensive: dir to non-empty dir} -setup { cleanup -} -constraints {notRoot unixOrWin testchmod} -body { +} -constraints {notRoot unixOrWin testchmod notWsl} -body { file mkdir tds1 file mkdir tds2 file mkdir [file join tdd1 tds1 xxx] @@ -1157,7 +1162,7 @@ test fCmd-10.7 {file rename: comprehensive: file to new name and dir} -setup { } -result [subst {{tf1 tf2} {[file join td1 tf3] [file join td1 tf4]} 1 0}] test fCmd-10.8 {file rename: comprehensive: dir to new name and dir} -setup { cleanup -} -constraints {unix notRoot testchmod} -body { +} -constraints {unix notRoot testchmod notWsl} -body { file mkdir td1 file mkdir td2 file mkdir td3 @@ -1249,7 +1254,7 @@ test fCmd-11.5 {TclFileRenameCmd: > 1 source & target is not a dir} -setup { catch {file rename tfa1 tfa2 tfa3} } -cleanup { file delete tfa1 tfa2 tfa3 -} -result {1} +} -result 1 test fCmd-11.6 {TclFileRenameCmd: : single file into directory} -setup { catch {file delete -force -- tfa1 tfad} } -constraints {notRoot} -body { @@ -1294,7 +1299,7 @@ test fCmd-12.1 {renamefile: source filename translation failing} -setup { catch {file rename ~/tfa1 tfa2} } -cleanup { set ::env(HOME) $temp -} -result {1} +} -result 1 test fCmd-12.2 {renamefile: src filename translation failing} -setup { set temp $::env(HOME) } -constraints {notRoot} -body { @@ -1306,7 +1311,7 @@ test fCmd-12.2 {renamefile: src filename translation failing} -setup { } -cleanup { set ::env(HOME) $temp file delete -force tfad -} -result {1} +} -result 1 test fCmd-12.3 {renamefile: stat failing on source} -setup { catch {file delete -force -- tfa1 tfa2} } -constraints {notRoot} -body { @@ -1351,10 +1356,10 @@ test fCmd-12.7 {renamefile: renaming directory into offspring} -setup { catch {file rename tfad tfad/dir} } -cleanup { file delete -force tfad -} -result {1} +} -result 1 test fCmd-12.8 {renamefile: generic error} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/dir file attributes tfa -permissions 0o555 @@ -1362,7 +1367,7 @@ test fCmd-12.8 {renamefile: generic error} -setup { } -cleanup { catch {file attributes tfa -permissions 0o777} file delete -force tfa -} -result {1} +} -result 1 test fCmd-12.9 {renamefile: moving a file across volumes} -setup { cleanup $tmpspace } -constraints {unix notRoot} -body { @@ -1424,7 +1429,7 @@ test fCmd-13.5 {TclCopyFilesCmd: target filename translation failing} -setup { catch { file copy tfa ~/foobar } } -cleanup { set ::env(HOME) $temp -} -result {1} +} -result 1 test fCmd-13.6 {TclCopyFilesCmd: > 1 source & target is not a dir} -setup { catch {file delete -force -- tfa1 tfa2 tfa3} } -constraints {notRoot} -body { @@ -1434,7 +1439,7 @@ test fCmd-13.6 {TclCopyFilesCmd: > 1 source & target is not a dir} -setup { catch {file copy tfa1 tfa2 tfa3} } -cleanup { file delete tfa1 tfa2 tfa3 -} -result {1} +} -result 1 test fCmd-13.7 {TclCopyFilesCmd: single file into directory} -setup { catch {file delete -force -- tfa1 tfad} } -constraints {notRoot} -body { @@ -1480,7 +1485,7 @@ test fCmd-14.1 {copyfile: source filename translation failing} -setup { catch {file copy ~/tfa1 tfa2} } -cleanup { set ::env(HOME) $temp -} -result {1} +} -result 1 test fCmd-14.2 {copyfile: dst filename translation failing} -setup { set temp $::env(HOME) } -constraints {notRoot} -body { @@ -1541,14 +1546,14 @@ test fCmd-14.7 {copyfile: copy directory succeeding} -setup { } -result {1 1} test fCmd-14.8 {copyfile: copy directory failing} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa/dir/a/b/c file attributes tfa/dir -permissions 0o000 catch {file copy tfa tfa2} } -cleanup { file attributes tfa/dir -permissions 0o777 file delete -force tfa tfa2 -} -result {1} +} -result 1 # # Coverage tests for TclMkdirCmd() @@ -1561,7 +1566,7 @@ test fCmd-15.1 {TclMakeDirsCmd: target filename translation failing} -setup { catch {file mkdir ~/tfa} } -cleanup { set ::env(HOME) $temp -} -result {1} +} -result 1 # # Can Tcl_SplitPath return argc == 0? If so them we need a test for that code. # @@ -1572,7 +1577,7 @@ test fCmd-15.2 {TclMakeDirsCmd - one directory} -setup { file isdirectory tfa } -cleanup { file delete tfa -} -result {1} +} -result 1 test fCmd-15.3 {TclMakeDirsCmd: - two directories} -setup { catch {file delete -force -- tfa1 tfa2} } -constraints {notRoot} -body { @@ -1591,7 +1596,7 @@ test fCmd-15.4 {TclMakeDirsCmd - stat failing} -setup { } -cleanup { file attributes tfa -permissions 0o777 file delete -force tfa -} -result {1} +} -result 1 test fCmd-15.5 {TclMakeDirsCmd: - making a directory several levels deep} -setup { catch {file delete -force -- tfa} } -constraints {notRoot} -body { @@ -1599,7 +1604,7 @@ test fCmd-15.5 {TclMakeDirsCmd: - making a directory several levels deep} -setup file isdir tfa/a/b/c } -cleanup { file delete -force tfa -} -result {1} +} -result 1 test fCmd-15.6 {TclMakeDirsCmd: - trying to overwrite a file} -setup { catch {file delete -force -- tfa} } -constraints {notRoot} -body { @@ -1623,7 +1628,7 @@ test fCmd-15.8 {TclFileMakeDirsCmd: trying to create an existing dir} -body { file isdir tfa } -constraints {notRoot} -cleanup { file delete tfa -} -result {1} +} -result 1 # Coverage tests for TclDeleteFilesCommand() test fCmd-16.1 {test the -- argument} -constraints {notRoot} -setup { @@ -1647,7 +1652,7 @@ test fCmd-16.3 {test bad option} -constraints {notRoot} -setup { catch {file delete -dog tfa} } -cleanup { file delete tfa -} -result {1} +} -result 1 test fCmd-16.4 {accept zero files (TIP 323)} -body { file delete } -result {} @@ -1662,7 +1667,7 @@ test fCmd-16.6 {delete: source filename translation failing} -setup { catch {file delete ~/tfa} } -cleanup { set ::env(HOME) $temp -} -result {1} +} -result 1 test fCmd-16.7 {remove a non-empty directory without -force} -setup { catch {file delete -force -- tfa} } -constraints {notRoot} -body { @@ -1671,7 +1676,7 @@ test fCmd-16.7 {remove a non-empty directory without -force} -setup { catch {file delete tfa} } -cleanup { file delete -force tfa -} -result {1} +} -result 1 test fCmd-16.8 {remove a normal file} -constraints {notRoot} -setup { catch {file delete -force -- tfa} } -body { @@ -1680,10 +1685,10 @@ test fCmd-16.8 {remove a normal file} -constraints {notRoot} -setup { catch {file delete tfa} } -cleanup { file delete -force tfa -} -result {1} +} -result 1 test fCmd-16.9 {error while deleting file} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa createfile tfa/a file attributes tfa -permissions 0o555 @@ -1696,7 +1701,7 @@ test fCmd-16.9 {error while deleting file} -setup { } -cleanup { file attributes tfa -permissions 0o777 file delete -force tfa -} -result {1} +} -result 1 test fCmd-16.10 {deleting multiple files} -constraints {notRoot} -setup { catch {file delete -force -- tfa1 tfa2} } -body { @@ -1714,14 +1719,14 @@ test fCmd-16.11 {TclFileDeleteCmd: removing a nonexistant file} -setup { # More coverage tests for mkpath() test fCmd-17.1 {mkdir stat failing on target but not ENOENT} -setup { catch {file delete -force -- tfa1} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa1 file attributes tfa1 -permissions 0o555 catch {file mkdir tfa1/tfa2} } -cleanup { file attributes tfa1 -permissions 0o777 file delete -force tfa1 -} -result {1} +} -result 1 test fCmd-17.2 {mkdir several levels deep - relative} -setup { catch {file delete -force -- tfa} } -constraints {notRoot} -body { @@ -1738,7 +1743,7 @@ test fCmd-17.3 {mkdir several levels deep - absolute} -setup { file isdir $f } -cleanup { file delete $f [file join [pwd] tfa] -} -result {1} +} -result 1 # # Functionality tests for TclFileRenameCmd() @@ -1899,7 +1904,7 @@ test fCmd-18.15 {TclFileRenameCmd : rename a file to a symlink dir} -setup { checkcontent tfa1/tfa2 $s } -cleanup { file delete -force tfa1 tfalink -} -result {1} +} -result 1 test fCmd-18.16 {TclFileRenameCmd: rename a dangling symlink} -setup { catch {file delete -force -- tfa1 tfalink} } -constraints {unix notRoot} -body { @@ -1924,7 +1929,7 @@ test fCmd-19.1 {remove empty directory} -constraints {notRoot} -setup { } -result {0} test fCmd-19.2 {rmdir error besides EEXIST} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa -permissions 0o555 @@ -1932,7 +1937,7 @@ test fCmd-19.2 {rmdir error besides EEXIST} -setup { } -cleanup { file attributes tfa -permissions 0o777 file delete -force tfa -} -result {1} +} -result 1 test fCmd-19.3 {recursive remove} -constraints {notRoot} -setup { catch {file delete -force -- tfa} } -body { @@ -1952,7 +1957,7 @@ test fCmd-19.3 {recursive remove} -constraints {notRoot} -setup { # test fCmd-20.1 {TraverseUnixTree : failure opening a subdirectory directory} -setup { catch {file delete -force -- tfa} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir tfa file mkdir tfa/a file attributes tfa/a -permissions 0o000 @@ -1960,7 +1965,7 @@ test fCmd-20.1 {TraverseUnixTree : failure opening a subdirectory directory} -se } -cleanup { file attributes tfa/a -permissions 0o777 file delete -force tfa -} -result {1} +} -result 1 test fCmd-20.2 {TraverseUnixTree : recursive delete of large directory: Bug 1034337} -setup { catch {file delete -force -- tfa} } -constraints {unix notRoot} -body { @@ -2013,7 +2018,7 @@ test fCmd-21.4 {copy : more than one source and target is not a directory} -setu catch {file copy tfa1 tfa2 tfa3} } -cleanup { file delete tfa1 tfa2 tfa3 -} -result {1} +} -result 1 test fCmd-21.5 {copy : multiple files into directory} -constraints {notRoot} -setup { catch {file delete -force -- tfa1 tfa2 tfad} } -body { @@ -2138,7 +2143,7 @@ test fCmd-22.2 {TclpRenameFile: attempt to overwrite itself} -setup { checkcontent tfa1 $s } -cleanup { file delete tfa1 -} -result {1} +} -result 1 test fCmd-22.3 {TclpRenameFile: rename dir to existing dir} -setup { catch {file delete -force -- d1 tfad} } -constraints {notRoot} -body { @@ -2598,7 +2603,7 @@ test fCmd-30.2 {file readable on 'NTUSER.DAT'} -constraints {win} -body { expr {[info exists env(USERPROFILE)] && [file exists $env(USERPROFILE)/NTUSER.DAT] && [file readable $env(USERPROFILE)/NTUSER.DAT]} -} -result {1} +} -result 1 # At least one CI environment (GitHub Actions) is set up with the page file in # an unusual location; skip the test if that is so. test fCmd-30.3 {file readable on 'pagefile.sys'} -constraints { diff --git a/tests/tcltest.test b/tests/tcltest.test index 9da14de..750a20d 100644 --- a/tests/tcltest.test +++ b/tests/tcltest.test @@ -2,8 +2,8 @@ # built-in commands. Sourcing this file into Tcl runs the tests and # generates output for errors. No output means no errors were found. # -# Copyright (c) 1998-1999 by Scriptics Corporation. -# Copyright (c) 2000 by Ajuba Solutions +# Copyright (c) 1998-1999 Scriptics Corporation. +# Copyright (c) 2000 Ajuba Solutions # All rights reserved. # Note that there are several places where the value of @@ -22,6 +22,9 @@ if {[catch {package require tcltest 2.1}]} { return } +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] + namespace eval ::tcltest::test { namespace import ::tcltest::* @@ -306,7 +309,7 @@ test tcltest-5.3 {testConstraint - constraint empty (tcltest::safeFetch)} { #} test tcltest-5.5 {InitConstraints: list of built-in constraints} \ - -constraints {!singleTestInterp} \ + -constraints {!singleTestInterp notWsl} \ -setup {tcltest::InitConstraints} \ -body { lsort [array names ::tcltest::testConstraints] } \ -result [lsort { @@ -556,7 +559,7 @@ switch -- $::tcl_platform(platform) { } } test tcltest-8.3 {tcltest a.tcl -tmpdir notReadableDir} { - -constraints {unix notRoot} + -constraints {unix notRoot notWsl} -body { child msg $a -tmpdir $notReadableDir return $msg @@ -572,7 +575,7 @@ testConstraint notFAT [expr { }] # FAT/NTFS permissions are fairly hopeless; ignore this test if that FS is used test tcltest-8.4 {tcltest a.tcl -tmpdir notWriteableDir} { - -constraints {unixOrWin notRoot notFAT} + -constraints {unixOrWin notRoot notFAT notWsl} -body { child msg $a -tmpdir $notWriteableDir return $msg @@ -645,7 +648,7 @@ test tcltest-8.11 {tcltest a.tcl -testdir thisdirectoryisafile} { -result {*not a directory*} } test tcltest-8.12 {tcltest a.tcl -testdir notReadableDir} { - -constraints {unix notRoot} + -constraints {unix notRoot notWsl} -body { child msg $a -testdir $notReadableDir return $msg diff --git a/tests/unixFCmd.test b/tests/unixFCmd.test index 4b1687f..7389cc7 100644 --- a/tests/unixFCmd.test +++ b/tests/unixFCmd.test @@ -18,6 +18,8 @@ if {"::tcltest" ni [namespace children]} { catch [list package require -exact Tcltest [info patchlevel]] testConstraint testchmod [llength [info commands testchmod]] +# File permissions broken on wsl without some "exotic" wsl configuration +testConstraint notWsl [expr {[llength [array names ::env *WSL*]] == 0}] # These tests really need to be run from a writable directory, which # it is assumed [temporaryDirectory] is. @@ -94,7 +96,7 @@ if {[testConstraint unix] && [testConstraint notRoot]} { test unixFCmd-1.1 {TclpRenameFile: EACCES} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir td1/td2/td3 file attributes td1/td2 -permissions 0o000 file rename td1/td2/td3 td2 @@ -135,7 +137,7 @@ test unixFCmd-1.6 {TclpRenameFile: ENOTDIR} {emptyTest unix notRoot} { } {} test unixFCmd-1.7 {TclpRenameFile: EXDEV} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { file mkdir foo/bar file attr foo -perm 0o40555 file rename foo/bar /tmp @@ -219,7 +221,7 @@ test unixFCmd-2.4 {TclpCopyFile: src is fifo} -setup { } -result {fifo fifo} test unixFCmd-2.5 {TclpCopyFile: copy attributes} -setup { cleanup -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { close [open tf1 a] file attributes tf1 -permissions 0o472 file copy tf1 tf2 @@ -334,7 +336,7 @@ test unixFCmd-16.3 {SetOwnerAttribute - invalid owner} -setup { test unixFCmd-17.1 {SetPermissionsAttribute} -setup { catch {file delete -force -- foo.test} -} -constraints {unix notRoot} -body { +} -constraints {unix notRoot notWsl} -body { close [open foo.test w] list [file attributes foo.test -permissions 0o000] \ [format 0o%03o [file attributes foo.test -permissions]] @@ -366,7 +368,7 @@ test unixFCmd-17.4 {SetPermissionsAttribute} -setup { close [open foo.test w] set ::i 4 proc permcheck {testnum permList expected} { - test $testnum {SetPermissionsAttribute} {unix notRoot} { + test $testnum {SetPermissionsAttribute} {unix notRoot notWsl} { set result {} foreach permstr $permList { file attributes foo.test -permissions $permstr diff --git a/win/tclWinTest.c b/win/tclWinTest.c index 0b4c8f6..d70d217 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -31,21 +31,14 @@ * Forward declarations of functions defined later in this file: */ -static int TesteventloopCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); -static int TestvolumetypeCmd(ClientData dummy, - Tcl_Interp *interp, int objc, - Tcl_Obj *const objv[]); -static int TestwinclockCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); -static int TestwinsleepCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); -static int TestSizeCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TesteventloopCmd; +static Tcl_ObjCmdProc TestvolumetypeCmd; +static Tcl_ObjCmdProc TestwinclockCmd; +static Tcl_ObjCmdProc TestwinsleepCmd; +static Tcl_ObjCmdProc TestSizeCmd; static Tcl_ObjCmdProc TestExceptionCmd; static int TestplatformChmod(const char *nativePath, int pmode); -static int TestchmodCmd(ClientData dummy, Tcl_Interp* interp, - int objc, Tcl_Obj *const objv[]); +static Tcl_ObjCmdProc TestchmodCmd; /* *---------------------------------------------------------------------- @@ -111,6 +104,7 @@ TesteventloopCmd( static int *framePtr = NULL;/* Pointer to integer on stack frame of * innermost invocation of the "wait" * subcommand. */ + (void)clientData; if (objc < 2) { Tcl_WrongNumArgs(interp, 1, objv, "option ..."); @@ -300,6 +294,7 @@ TestwinsleepCmd( Tcl_Obj *const * objv) /* Parameter vector */ { int ms; + (void)clientData; if (objc != 2) { Tcl_WrongNumArgs(interp, 1, objv, "ms"); @@ -385,6 +380,7 @@ TestExceptionCmd( EXCEPTION_GUARD_PAGE, EXCEPTION_INVALID_HANDLE, CONTROL_C_EXIT }; int cmd; + (void)dummy; if (objc != 2) { Tcl_WrongNumArgs(interp, 0, objv, ""); @@ -411,7 +407,6 @@ TestExceptionCmd( /* SMASH! */ RaiseException(exceptions[cmd], EXCEPTION_NONCONTINUABLE, 0, NULL); - /* NOTREACHED */ return TCL_OK; } @@ -451,7 +446,6 @@ TestplatformChmod( DWORD attr, newAclSize; PACL newAcl = NULL; int res = 0; - SID_IDENTIFIER_AUTHORITY worldAuthority = SECURITY_WORLD_SID_AUTHORITY; HANDLE hToken = NULL; int i; @@ -483,7 +477,7 @@ TestplatformChmod( GetLastError() != ERROR_INSUFFICIENT_BUFFER) { goto done; } - pTokenUser = ckalloc(dw); + pTokenUser = (TOKEN_USER *)ckalloc(dw); if (!GetTokenInformation(hToken, TokenUser, pTokenUser, dw, &dw)) { goto done; } @@ -525,7 +519,7 @@ TestplatformChmod( GetLastError() != ERROR_INSUFFICIENT_BUFFER) { goto done; } - pTokenGroup = ckalloc(dw); + pTokenGroup = (TOKEN_PRIMARY_GROUP *)ckalloc(dw); if (!GetTokenInformation(hToken, TokenPrimaryGroup, pTokenGroup, dw, &dw)) { ckfree(pTokenGroup); goto done; @@ -592,7 +586,7 @@ TestplatformChmod( newAclSize += offsetof(ACCESS_ALLOWED_ACE, SidStart) + aceEntry[i].sidLen; } - newAcl = ckalloc(newAclSize); + newAcl = (PACL)ckalloc(newAclSize); if (!InitializeAcl(newAcl, newAclSize, ACL_REVISION)) { goto done; } @@ -668,6 +662,7 @@ TestchmodCmd( Tcl_Obj *const * objv) /* Parameter vector */ { int i, mode; + (void)dummy; if (objc < 2) { Tcl_WrongNumArgs(interp, 1, objv, "mode file ?file ...?"); -- cgit v0.12 From 38555b60a2647d88236a922f72741a3f4611ccd2 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 9 Mar 2023 02:47:53 +0000 Subject: winFCmd-1.24 has different error code on Win 11 --- tests/winFCmd.test | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/winFCmd.test b/tests/winFCmd.test index b146253..83dfbf7 100644 --- a/tests/winFCmd.test +++ b/tests/winFCmd.test @@ -286,8 +286,9 @@ test winFCmd-1.23 {TclpRenameFile: move dir into self} -setup { test winFCmd-1.24 {TclpRenameFile: move a root dir} -setup { cleanup } -constraints {win testfile} -body { + # Error code depends on Windows version testfile mv / c:/ -} -returnCodes error -result EINVAL +} -returnCodes error -result {^(EINVAL|ENOENT)$} -match regexp test winFCmd-1.25 {TclpRenameFile: cross file systems} -setup { cleanup } -constraints {win cdrom testfile} -body { -- cgit v0.12 From a1fb5545852518890326ddcf62f18e05de2425e3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Mar 2023 09:39:59 +0000 Subject: Fix tests/tcltest.test testcases (missing "namespace import") --- tests/tcltest.test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/tcltest.test b/tests/tcltest.test index 750a20d..075cdf6 100644 --- a/tests/tcltest.test +++ b/tests/tcltest.test @@ -17,9 +17,9 @@ # interfere with the [test] doing the testing. # -if {[catch {package require tcltest 2.1}]} { - puts stderr "Skipping tests in [info script]. tcltest 2.1 required." - return +if {"::tcltest" ni [namespace children]} { + package require tcltest 2.1 + namespace import -force ::tcltest::* } # File permissions broken on wsl without some "exotic" wsl configuration -- cgit v0.12 From 5b0be625362e6884c5276718ba911a3d292cf1c1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Mar 2023 10:08:02 +0000 Subject: Adapt 2 testcases (io-39.16/io-39.16a), showing that "-encoding" can be shortened to "-en", but not to "-e" (because there is -eofchar too) --- tests/io.test | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/io.test b/tests/io.test index ca7bd0c..6d556da 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5607,13 +5607,20 @@ test io-39.15 {Tcl_SetChannelOption: -encoding, binary & utf-8} { close $f set x } \u7266 -test io-39.16 {Tcl_SetChannelOption: -encoding, errors} { +test io-39.16 {Tcl_SetChannelOption: -encoding (shortened to "-en"), errors} -body { file delete $path(test1) set f [open $path(test1) w] - set result [list [catch {fconfigure $f -encoding foobar} msg] $msg] + fconfigure $f -en foobar +} -cleanup { close $f - set result -} {1 {unknown encoding "foobar"}} +} -returnCodes 1 -result {unknown encoding "foobar"} +test io-39.16a {Tcl_SetChannelOption: -encoding (invalid shortening to "-e"), errors} -body { + file delete $path(test1) + set f [open $path(test1) w] + fconfigure $f -e foobar +} -cleanup { + close $f +} -returnCodes 1 -result {bad option "-e": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, or -translation} test io-39.17 {Tcl_SetChannelOption: -encoding, clearing CHANNEL_NEED_MORE_DATA} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" r+] fconfigure $f -encoding binary -- cgit v0.12 From 56f5c7751c0f9e4da9c1a40ee533ce392a43e4a2 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 9 Mar 2023 10:47:12 +0000 Subject: Fix SetChannelOption parsing of -encoding* to match GetChannelOption --- generic/tclIO.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 97ca8d0..4a6dbf4 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8194,7 +8194,7 @@ Tcl_SetChannelOption( } Tcl_SetChannelBufferSize(chan, newBufferSize); return TCL_OK; - } else if (HaveOpt(2, "-encoding")) { + } else if (HaveOpt(8, "-encoding")) { Tcl_Encoding encoding; int profile; @@ -8230,6 +8230,15 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; + } else if (HaveOpt(9, "-encodingprofile")) { + int profile; + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { + return TCL_ERROR; + } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { if (!newValue[0] || (!(newValue[0] & 0x80) && !newValue[1])) { if (GotFlag(statePtr, TCL_READABLE)) { @@ -8285,15 +8294,6 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-encodingprofile")) { - int profile; - if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { - return TCL_ERROR; - } - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; } else if (HaveOpt(1, "-translation")) { const char *readMode, *writeMode; -- cgit v0.12 From 494b4c8127e703f7b20f85dbb342921e36a8b557 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Mar 2023 20:55:43 +0000 Subject: Fix cmdAH-4.3.13.00D80000.solo.utf-32le.tcl8.a testcase from tip-656-tcl9 branch, when TCL_UTF_MAX=3 --- generic/tclEncoding.c | 39 ++++++++++++++++++++++++++++++++++++--- win/tclWinTest.c | 10 +++++----- 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 61e3236..fc3397a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2545,7 +2545,7 @@ Utf32ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - int ch, bytesLeft = srcLen % 4; + int ch = 0, bytesLeft = srcLen % 4; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2562,6 +2562,21 @@ Utf32ToUtfProc( srcLen -= bytesLeft; } +#if TCL_UTF_MAX < 4 + /* + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. + */ + + if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) { + result = TCL_CONVERT_MULTIBYTE; + srcLen-= 4; + } +#endif + srcStart = src; srcEnd = src + srcLen; @@ -2574,21 +2589,33 @@ Utf32ToUtfProc( break; } +#if TCL_UTF_MAX < 4 + int prev = ch; +#endif if (flags & TCL_ENCODING_LE) { ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if ((unsigned)ch > 0x10FFFF) { +#if TCL_UTF_MAX < 4 + if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif + if ((unsigned)ch > 0x10FFFF) { + ch = 0xFFFD; if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; } - ch = 0xFFFD; } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; +#if TCL_UTF_MAX < 4 + ch = 0; +#endif break; } } @@ -2606,6 +2633,12 @@ Utf32ToUtfProc( src += sizeof(unsigned int); } +#if TCL_UTF_MAX < 4 + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ if (dst > dstEnd) { diff --git a/win/tclWinTest.c b/win/tclWinTest.c index c7abcdc..29bdfe4 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -398,7 +398,7 @@ TestplatformChmod( const char *nativePath, int pmode) { - /* + /* * Note FILE_DELETE_CHILD missing from dirWriteMask because we do * not want overriding of child's delete setting when testing */ @@ -406,7 +406,7 @@ TestplatformChmod( FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_ADD_FILE | FILE_ADD_SUBDIRECTORY | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD dirReadMask = + static const DWORD dirReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_LIST_DIRECTORY | STANDARD_RIGHTS_READ | SYNCHRONIZE; /* Note - default user privileges allow ignoring TRAVERSE setting */ @@ -416,7 +416,7 @@ TestplatformChmod( static const DWORD fileWriteMask = FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_WRITE_DATA | FILE_APPEND_DATA | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD fileReadMask = + static const DWORD fileReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_READ_DATA | STANDARD_RIGHTS_READ | SYNCHRONIZE; static const DWORD fileExecuteMask = @@ -450,7 +450,7 @@ TestplatformChmod( if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) { goto done; } - + /* Get process SID */ if (!GetTokenInformation(hToken, TokenUser, NULL, 0, &dw) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { @@ -468,7 +468,7 @@ TestplatformChmod( Tcl_Free(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ goto done; } - /* + /* * Always include DACL modify rights so we don't get locked out */ aceEntry[nSids].mask = READ_CONTROL | WRITE_DAC | WRITE_OWNER | SYNCHRONIZE | -- cgit v0.12 From e9b9864f5680ac7c8b219468d057238c4172f825 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 10 Mar 2023 07:37:55 +0000 Subject: Fix parsing of fconfigure set -encoding* options --- generic/tclIO.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index dd05ee3..e96ac23 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8142,7 +8142,7 @@ Tcl_SetChannelOption( } Tcl_SetChannelBufferSize(chan, newBufferSize); return TCL_OK; - } else if (HaveOpt(2, "-encoding")) { + } else if (HaveOpt(8, "-encoding")) { Tcl_Encoding encoding; int profile; @@ -8178,6 +8178,15 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; + } else if (HaveOpt(9, "-encodingprofile")) { + int profile; + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { + return TCL_ERROR; + } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { if (!newValue[0] || (!(newValue[0] & 0x80) && (!newValue[1] #ifndef TCL_NO_DEPRECATED @@ -8212,15 +8221,6 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-encodingprofile")) { - int profile; - if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { - return TCL_ERROR; - } - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; } else if (HaveOpt(1, "-translation")) { const char *readMode, *writeMode; -- cgit v0.12 From 93bf87ed859e04b2fc9b197239ad6838e761e85d Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 10 Mar 2023 13:32:47 +0000 Subject: Make test less fragile to changing set of options. --- tests/io.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/io.test b/tests/io.test index 6d556da..181d028 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5620,7 +5620,7 @@ test io-39.16a {Tcl_SetChannelOption: -encoding (invalid shortening to "-e"), er fconfigure $f -e foobar } -cleanup { close $f -} -returnCodes 1 -result {bad option "-e": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, or -translation} +} -returnCodes 1 -match glob -result {bad option "-e": should be one of *} test io-39.17 {Tcl_SetChannelOption: -encoding, clearing CHANNEL_NEED_MORE_DATA} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" r+] fconfigure $f -encoding binary -- cgit v0.12 From 6f85588bab4bad23425a2fea4e953546b8fa7ca3 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 11 Mar 2023 16:43:36 +0000 Subject: Add testencoding Tcl_ExternalToUtf/Tcl_UtfToExternal for raw testing of corresponding C functions --- generic/tclTest.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index b3df8ec..a398797 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2016,6 +2016,156 @@ static void SpecialFree( } /* + *------------------------------------------------------------------------ + * + * UtfTransformFn -- + * + * Implements a direct call into Tcl_UtfToExternal and Tcl_ExternalToUtf + * as otherwise there is no script level command that directly exercises + * these functions (i/o command cannot test all combinations) + * The arguments at the script level are roughly those of the above + * functions: + * encodingname srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar? + * + * Results: + * TCL_OK or TCL_ERROR. This any errors running the test, NOT the + * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. + * + * Side effects: + * The result in the interpreter is a list of the return code from the + * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and + * the encoded binary string. If any of the srcreadvar, dstwrotevar and + * dstcharsvar are specified and not empty, they are treated as names + * of variables where the *srcRead, *dstWrote and *dstChars output + * from the functions are stored. + *------------------------------------------------------------------------ + */ +typedef int +UtfTransformFn(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, Tcl_Size dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +static int UtfExtWrapper( + Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) +{ + Tcl_Encoding encoding; + int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ + Tcl_EncodingState encState; + int flags; + Tcl_Size srcLen, bufLen; + const unsigned char *bytes; + unsigned char *bufPtr; + int srcRead, dstLen, dstWrote, dstChars; + Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; + int result; + + if (objc < 7 || objc > 10) { + Tcl_WrongNumArgs(interp, + 2, + objv, + "encoding srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar?"); + return TCL_ERROR; + } + if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[4], &flags) != TCL_OK) { + return TCL_ERROR; + } + /* Assumes state is integer if not "" */ + if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { + encState = (Tcl_EncodingState)&encStateValue; + } else if (Tcl_GetCharLength(objv[5]) == 0) { + encState = NULL; + } else { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { + return TCL_ERROR; + } + srcReadVar = NULL; + dstWroteVar = NULL; + dstCharsVar = NULL; + if (objc > 7) { + /* Has caller requested srcRead? */ + if (Tcl_GetCharLength(objv[7])) { + srcReadVar = objv[7]; + } + if (objc > 8) { + /* Ditto for dstWrote */ + if (Tcl_GetCharLength(objv[8])) { + dstWroteVar = objv[8]; + } + if (objc > 9) { + if (Tcl_GetCharLength(objv[9])) { + dstCharsVar = objv[9]; + } + } + } + } + + bufLen = dstLen + 4; /* 4 -> overflow detection */ + bufPtr = ckalloc(bufLen); + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\x00", 4); /* overflow detection */ + bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ + result = (*transformer)(interp, encoding, bytes, srcLen, flags, + &encState, bufPtr, dstLen, + srcReadVar ? &srcRead : NULL, + &dstWrote, + dstCharsVar ? &dstChars : NULL); + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\x00", 4)) { + Tcl_SetResult(interp, + "Tcl_ExternalToUtf wrote past output buffer", + TCL_STATIC); + result = TCL_ERROR; + } else { + Tcl_Obj *resultObjs[3]; + switch (result) { + case TCL_OK: + resultObjs[0] = Tcl_NewStringObj("ok", -1); + break; + case TCL_CONVERT_MULTIBYTE: + resultObjs[0] = Tcl_NewStringObj("multibyte", -1); + break; + case TCL_CONVERT_SYNTAX: + resultObjs[0] = Tcl_NewStringObj("syntax", -1); + break; + case TCL_CONVERT_UNKNOWN: + resultObjs[0] = Tcl_NewStringObj("unknown", -1); + break; + case TCL_CONVERT_NOSPACE: + resultObjs[0] = Tcl_NewStringObj("nospace", -1); + break; + default: + resultObjs[0] = Tcl_NewIntObj(result); + break; + } + result = TCL_OK; + resultObjs[1] = + encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstWrote); + if (srcReadVar) { + if (Tcl_ObjSetVar2(interp, srcReadVar, NULL, Tcl_NewIntObj(srcRead), 0) == NULL) { + result = TCL_ERROR; + } + } + if (dstWroteVar) { + if (Tcl_ObjSetVar2(interp, dstWroteVar, NULL, Tcl_NewIntObj(dstWrote), 0) == NULL) { + result = TCL_ERROR; + } + } + if (dstCharsVar) { + if (Tcl_ObjSetVar2(interp, dstCharsVar, NULL, Tcl_NewIntObj(dstChars), 0) == NULL) { + result = TCL_ERROR; + } + } + Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); + } + + ckfree(bufPtr); + Tcl_FreeEncoding(encoding); /* Free returned reference */ + return result; +} + +/* *---------------------------------------------------------------------- * * TestencodingCmd -- @@ -2044,10 +2194,10 @@ TestencodingObjCmd( const char *string; TclEncoding *encodingPtr; static const char *const optionStrings[] = { - "create", "delete", "nullength", NULL + "create", "delete", "nullength", "Tcl_ExternalToUtf", "Tcl_UtfToExternal", NULL }; enum options { - ENC_CREATE, ENC_DELETE, ENC_NULLENGTH + ENC_CREATE, ENC_DELETE, ENC_NULLENGTH, ENC_EXTTOUTF, ENC_UTFTOEXT }; if (objc < 2) { @@ -2116,6 +2266,11 @@ TestencodingObjCmd( Tcl_SetObjResult(interp, Tcl_NewIntObj(Tcl_GetEncodingNulLength(encoding))); Tcl_FreeEncoding(encoding); + break; + case ENC_EXTTOUTF: + return UtfExtWrapper(interp,Tcl_ExternalToUtf,objc,objv); + case ENC_UTFTOEXT: + return UtfExtWrapper(interp,Tcl_UtfToExternal,objc,objv); } return TCL_OK; } -- cgit v0.12 From 5533596329b2aaf620858427a86c7e299cc10b66 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 11 Mar 2023 16:47:42 +0000 Subject: Add testencoding Tcl_ExternalToUtf/Tcl_UtfToExternal for raw testing of corresponding C functions --- generic/tclTest.c | 159 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 06d5064..92e7f7a 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1980,6 +1980,156 @@ static void SpecialFree( } /* + *------------------------------------------------------------------------ + * + * UtfTransformFn -- + * + * Implements a direct call into Tcl_UtfToExternal and Tcl_ExternalToUtf + * as otherwise there is no script level command that directly exercises + * these functions (i/o command cannot test all combinations) + * The arguments at the script level are roughly those of the above + * functions: + * encodingname srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar? + * + * Results: + * TCL_OK or TCL_ERROR. This any errors running the test, NOT the + * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. + * + * Side effects: + * The result in the interpreter is a list of the return code from the + * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and + * the encoded binary string. If any of the srcreadvar, dstwrotevar and + * dstcharsvar are specified and not empty, they are treated as names + * of variables where the *srcRead, *dstWrote and *dstChars output + * from the functions are stored. + *------------------------------------------------------------------------ + */ +typedef int +UtfTransformFn(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, Tcl_Size dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +static int UtfExtWrapper( + Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) +{ + Tcl_Encoding encoding; + int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ + Tcl_EncodingState encState; + int flags; + Tcl_Size srcLen, bufLen; + const unsigned char *bytes; + unsigned char *bufPtr; + int srcRead, dstLen, dstWrote, dstChars; + Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; + int result; + + if (objc < 7 || objc > 10) { + Tcl_WrongNumArgs(interp, + 2, + objv, + "encoding srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar?"); + return TCL_ERROR; + } + if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[4], &flags) != TCL_OK) { + return TCL_ERROR; + } + /* Assumes state is integer if not "" */ + if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { + encState = (Tcl_EncodingState)&encStateValue; + } else if (Tcl_GetCharLength(objv[5]) == 0) { + encState = NULL; + } else { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { + return TCL_ERROR; + } + srcReadVar = NULL; + dstWroteVar = NULL; + dstCharsVar = NULL; + if (objc > 7) { + /* Has caller requested srcRead? */ + if (Tcl_GetCharLength(objv[7])) { + srcReadVar = objv[7]; + } + if (objc > 8) { + /* Ditto for dstWrote */ + if (Tcl_GetCharLength(objv[8])) { + dstWroteVar = objv[8]; + } + if (objc > 9) { + if (Tcl_GetCharLength(objv[9])) { + dstCharsVar = objv[9]; + } + } + } + } + + bufLen = dstLen + 4; /* 4 -> overflow detection */ + bufPtr = ckalloc(bufLen); + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\x00", 4); /* overflow detection */ + bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ + result = (*transformer)(interp, encoding, bytes, srcLen, flags, + &encState, bufPtr, dstLen, + srcReadVar ? &srcRead : NULL, + &dstWrote, + dstCharsVar ? &dstChars : NULL); + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\x00", 4)) { + Tcl_SetResult(interp, + "Tcl_ExternalToUtf wrote past output buffer", + TCL_STATIC); + result = TCL_ERROR; + } else { + Tcl_Obj *resultObjs[3]; + switch (result) { + case TCL_OK: + resultObjs[0] = Tcl_NewStringObj("ok", -1); + break; + case TCL_CONVERT_MULTIBYTE: + resultObjs[0] = Tcl_NewStringObj("multibyte", -1); + break; + case TCL_CONVERT_SYNTAX: + resultObjs[0] = Tcl_NewStringObj("syntax", -1); + break; + case TCL_CONVERT_UNKNOWN: + resultObjs[0] = Tcl_NewStringObj("unknown", -1); + break; + case TCL_CONVERT_NOSPACE: + resultObjs[0] = Tcl_NewStringObj("nospace", -1); + break; + default: + resultObjs[0] = Tcl_NewIntObj(result); + break; + } + result = TCL_OK; + resultObjs[1] = + encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstWrote); + if (srcReadVar) { + if (Tcl_ObjSetVar2(interp, srcReadVar, NULL, Tcl_NewIntObj(srcRead), 0) == NULL) { + result = TCL_ERROR; + } + } + if (dstWroteVar) { + if (Tcl_ObjSetVar2(interp, dstWroteVar, NULL, Tcl_NewIntObj(dstWrote), 0) == NULL) { + result = TCL_ERROR; + } + } + if (dstCharsVar) { + if (Tcl_ObjSetVar2(interp, dstCharsVar, NULL, Tcl_NewIntObj(dstChars), 0) == NULL) { + result = TCL_ERROR; + } + } + Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); + } + + ckfree(bufPtr); + Tcl_FreeEncoding(encoding); /* Free returned reference */ + return result; +} + +/* *---------------------------------------------------------------------- * * TestencodingCmd -- @@ -2008,10 +2158,10 @@ TestencodingObjCmd( const char *string; TclEncoding *encodingPtr; static const char *const optionStrings[] = { - "create", "delete", "nullength", NULL + "create", "delete", "nullength", "Tcl_ExternalToUtf", "Tcl_UtfToExternal", NULL }; enum options { - ENC_CREATE, ENC_DELETE, ENC_NULLENGTH + ENC_CREATE, ENC_DELETE, ENC_NULLENGTH, ENC_EXTTOUTF, ENC_UTFTOEXT } index; if (objc < 2) { @@ -2080,6 +2230,11 @@ TestencodingObjCmd( Tcl_SetObjResult(interp, Tcl_NewIntObj(Tcl_GetEncodingNulLength(encoding))); Tcl_FreeEncoding(encoding); + break; + case ENC_EXTTOUTF: + return UtfExtWrapper(interp,Tcl_ExternalToUtf,objc,objv); + case ENC_UTFTOEXT: + return UtfExtWrapper(interp,Tcl_UtfToExternal,objc,objv); } return TCL_OK; } -- cgit v0.12 From 1889ded1144a4dbd44d0c6f03e72a01d70115a51 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 11 Mar 2023 22:00:29 +0000 Subject: Proposed fix for [db7a085bd9]: encoding convertfrom -strict utf-16 accepts partial surrogates. TODO: testcases, and implement for 8.7 too --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 30 ++++++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4df1216..ac504d0 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -514,7 +514,7 @@ EncodingConvertfromObjCmd( char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); + TCL_Z_MODIFIER "u: '\\x%02X'", result, UCHAR(bytesPtr[result]))); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); Tcl_DStringFree(&ds); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fc3397a..4f334bb 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2603,6 +2603,7 @@ Utf32ToUtfProc( dst += Tcl_UniCharToUtf(-1, dst); } #endif + if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; if (STOPONERROR) { @@ -2639,6 +2640,7 @@ Utf32ToUtfProc( dst += Tcl_UniCharToUtf(-1, dst); } #endif + if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ if (dst > dstEnd) { @@ -2846,6 +2848,13 @@ Utf16ToUtfProc( ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; /* Go back to before the high surrogate */ + dst--; /* Also undo writing a single byte too much */ + numChars--; + break; + } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -2855,17 +2864,30 @@ Utf16ToUtfProc( * unsigned short-size data. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); - } else { + } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else if (((ch & ~0x3FF) == 0xDC00) && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + /* Lo surrogate not preceded by Hi surrogate */ + result = TCL_CONVERT_UNKNOWN; + break; + } else { + dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } if ((ch & ~0x3FF) == 0xD800) { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; + dst--; + numChars--; + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ -- cgit v0.12 From 8c5fc11b5ac89e8e6fd57484c9221a5e70c3c145 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 10:49:18 +0000 Subject: Always output 2 hex characters in "unexpected byte sequence" exception message. make testcases io-38.3/chan-io-38.3 independant from system encoding --- generic/tclCmdAH.c | 2 +- tests/chanio.test | 2 +- tests/io.test | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 4f743cc..c2424d6 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -656,7 +656,7 @@ EncodingConvertfromObjCmd( char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); + "u: '\\x%02X'", result, UCHAR(bytesPtr[result]))); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); Tcl_DStringFree(&ds); diff --git a/tests/chanio.test b/tests/chanio.test index 2915fc5..6814224 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -4982,7 +4982,7 @@ test chan-io-38.2 {Tcl_SetChannelBufferSize, Tcl_GetChannelBufferSize} -setup { test chan-io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - chan configure $chan -buffersize 10 + chan configure $chan -buffersize 10 -encoding utf-8 set var [chan read $chan 2] chan configure $chan -buffersize 32 append var [chan read $chan] diff --git a/tests/io.test b/tests/io.test index e762bba..3c0ec2e 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5476,7 +5476,7 @@ test io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - fconfigure $chan -buffersize 10 + fconfigure $chan -buffersize 10 -encoding utf-8 set var [read $chan 2] fconfigure $chan -buffersize 32 append var [read $chan] -- cgit v0.12 From b7f151f1268d4b49953da193f135d52e6e52f841 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 11:24:24 +0000 Subject: Make test-output more readable when it contains non-printable characters (stolen from TIP #656 impl, thanks Ashok!) tcltest -> 2.5.6 --- library/manifest.txt | 2 +- library/tcltest/pkgIndex.tcl | 2 +- library/tcltest/tcltest.tcl | 39 ++++++++++++++++++++++++++++++++++++--- unix/Makefile.in | 4 ++-- win/Makefile.in | 4 ++-- 5 files changed, 42 insertions(+), 9 deletions(-) diff --git a/library/manifest.txt b/library/manifest.txt index cc1e223..5a999f4 100644 --- a/library/manifest.txt +++ b/library/manifest.txt @@ -12,7 +12,7 @@ apply {{dir} { 0 tcl::idna 1.0.1 {cookiejar idna.tcl} 0 platform 1.0.19 {platform platform.tcl} 0 platform::shell 1.1.4 {platform shell.tcl} - 1 tcltest 2.5.5 {tcltest tcltest.tcl} + 1 tcltest 2.5.6 {tcltest tcltest.tcl} } { if {$isafe && !$safe} continue package ifneeded $package $version [list source [file join $dir {*}$file]] diff --git a/library/tcltest/pkgIndex.tcl b/library/tcltest/pkgIndex.tcl index 18b05e5..9903e32 100644 --- a/library/tcltest/pkgIndex.tcl +++ b/library/tcltest/pkgIndex.tcl @@ -9,4 +9,4 @@ # full path name of this file's directory. if {![package vsatisfies [package provide Tcl] 8.5-]} {return} -package ifneeded tcltest 2.5.5 [list source [file join $dir tcltest.tcl]] +package ifneeded tcltest 2.5.6 [list source [file join $dir tcltest.tcl]] diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 7344f9f..19b7d64 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -22,7 +22,7 @@ namespace eval tcltest { # When the version number changes, be sure to update the pkgIndex.tcl file, # and the install directory in the Makefiles. When the minor version # changes (new feature) be sure to update the man page as well. - variable Version 2.5.5 + variable Version 2.5.6 # Compatibility support for dumb variables defined in tcltest 1 # Do not use these. Call [package provide Tcl] and [info patchlevel] @@ -1134,6 +1134,39 @@ proc tcltest::SafeFetch {n1 n2 op} { } } + +# tcltest::Asciify -- +# +# Transforms the passed string to contain only printable ascii characters. +# Useful for printing to terminals. Non-printables are mapped to +# \x, \u or \U sequences. +# +# Arguments: +# s - string to transform +# +# Results: +# The transformed strings +# +# Side effects: +# None. + +proc tcltest::Asciify {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127) && ($i > 0)} { + append print $c + } elseif {$i <= 0xFF} { + append print \\x[format %02X $i] + } elseif {$i <= 0xFFFF} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + # tcltest::ConstraintInitializer -- # # Get or set a script that when evaluated in the tcltest namespace @@ -2221,9 +2254,9 @@ proc tcltest::test {name description args} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - puts [outputChannel] "---- Result was:\n$actualAnswer" + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" puts [outputChannel] "---- Result should have been\ - ($match matching):\n$result" + ($match matching):\n[Asciify $result]" } } if {$errorCodeFailure} { diff --git a/unix/Makefile.in b/unix/Makefile.in index 1b2718e..da057d8 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -1071,9 +1071,9 @@ install-libraries: libraries @echo "Installing package msgcat 1.7.1 as a Tcl Module" @$(INSTALL_DATA) $(TOP_DIR)/library/msgcat/msgcat.tcl \ "$(MODULE_INSTALL_DIR)/8.7/msgcat-1.7.1.tm" - @echo "Installing package tcltest 2.5.5 as a Tcl Module" + @echo "Installing package tcltest 2.5.6 as a Tcl Module" @$(INSTALL_DATA) $(TOP_DIR)/library/tcltest/tcltest.tcl \ - "$(MODULE_INSTALL_DIR)/8.5/tcltest-2.5.5.tm" + "$(MODULE_INSTALL_DIR)/8.5/tcltest-2.5.6.tm" @echo "Installing package platform 1.0.19 as a Tcl Module" @$(INSTALL_DATA) $(TOP_DIR)/library/platform/platform.tcl \ "$(MODULE_INSTALL_DIR)/8.4/platform-1.0.19.tm" diff --git a/win/Makefile.in b/win/Makefile.in index 6d7bb7d..202b860 100644 --- a/win/Makefile.in +++ b/win/Makefile.in @@ -889,8 +889,8 @@ install-libraries: libraries install-tzdata install-msgs done; @echo "Installing package msgcat 1.7.1 as a Tcl Module"; @$(COPY) $(ROOT_DIR)/library/msgcat/msgcat.tcl "$(MODULE_INSTALL_DIR)/8.7/msgcat-1.7.1.tm"; - @echo "Installing package tcltest 2.5.5 as a Tcl Module"; - @$(COPY) $(ROOT_DIR)/library/tcltest/tcltest.tcl "$(MODULE_INSTALL_DIR)/8.5/tcltest-2.5.5.tm"; + @echo "Installing package tcltest 2.5.6 as a Tcl Module"; + @$(COPY) $(ROOT_DIR)/library/tcltest/tcltest.tcl "$(MODULE_INSTALL_DIR)/8.5/tcltest-2.5.6.tm"; @echo "Installing package platform 1.0.19 as a Tcl Module"; @$(COPY) $(ROOT_DIR)/library/platform/platform.tcl "$(MODULE_INSTALL_DIR)/8.4/platform-1.0.19.tm"; @echo "Installing package platform::shell 1.1.4 as a Tcl Module"; -- cgit v0.12 From 8bf2e2ace2224e4066dfe647f47b531591fe8666 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 11:32:52 +0000 Subject: Forgot that \x00 is not printable anyway --- library/tcltest/tcltest.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 19b7d64..6cb7d92 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -1154,7 +1154,7 @@ proc tcltest::Asciify {s} { set print "" foreach c [split $s ""] { set i [scan $c %c] - if {[string is print $c] && ($i <= 127) && ($i > 0)} { + if {[string is print $c] && ($i <= 127)} { append print $c } elseif {$i <= 0xFF} { append print \\x[format %02X $i] -- cgit v0.12 From eeee744ee2f72edd36c45a3ee07dbbee39f16994 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:10:52 +0000 Subject: Minor bug-fix for utf-32: Only throw exception for codepoints > +U10FFFF if "-strict" is specified. Otherwise replace with 0xFFFD --- generic/tclEncoding.c | 12 +++++------- tests/encoding.test | 6 ++++++ 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4f334bb..a471fe9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2606,19 +2606,17 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; - if (STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; break; } } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; + result = TCL_CONVERT_SYNTAX; #if TCL_UTF_MAX < 4 - ch = 0; + ch = 0; #endif - break; - } + break; } /* @@ -2850,7 +2848,7 @@ Utf16ToUtfProc( if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_UNKNOWN; - src -= 2; /* Go back to before the high surrogate */ + src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; diff --git a/tests/encoding.test b/tests/encoding.test index 68b5dcd..c8f34ba 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -569,6 +569,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" -- cgit v0.12 From aee8588fbcee145de5cf3012f7c0c60277fb5394 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:23:33 +0000 Subject: 2 more testcases, proving utf-32 handling of surrogates (actually: not handling!) is OK --- tests/encoding.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index c8f34ba..5e8d3f7 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -606,6 +606,12 @@ test encoding-17.9 {Utf32ToUtfProc} -body { test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD +test encoding-17.11 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xD8\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.12 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xDC\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -- cgit v0.12 From 131176ce2f937173892c6e7e3a78978f6e8da2b5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 16:37:12 +0000 Subject: Backport [6fb14ee3e876978c]. Add testcases --- generic/tclEncoding.c | 12 +++++------- tests/encoding.test | 12 ++++++++++++ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b3409d6..27f11d8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2605,17 +2605,15 @@ Utf32ToUtfProc( if ((unsigned)ch > 0x10FFFF) { ch = 0xFFFD; - if (STOPONERROR) { + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { result = TCL_CONVERT_SYNTAX; break; } } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && ((ch & ~0x7FF) == 0xD800)) { - if (STOPONERROR) { - result = TCL_CONVERT_SYNTAX; - ch = 0; - break; - } + result = TCL_CONVERT_SYNTAX; + ch = 0; + break; } /* @@ -2845,7 +2843,7 @@ Utf16ToUtfProc( if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { result = TCL_CONVERT_UNKNOWN; - src -= 2; /* Go back to before the high surrogate */ + src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; break; diff --git a/tests/encoding.test b/tests/encoding.test index 0fe64ce..cf63211 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -568,6 +568,12 @@ test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-16.24 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" +} -result \uFFFD +test encoding-16.25 {Utf32ToUtfProc} -body { + encoding convertfrom utf-32 "\x01\x00\x00\x01" +} -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { encoding convertto utf-16 "\U460DC" @@ -599,6 +605,12 @@ test encoding-17.9 {Utf32ToUtfProc} -body { test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -nocomplain utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD +test encoding-17.11 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xD8\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} +test encoding-17.12 {Utf32ToUtfProc} -body { + encoding convertfrom -strict utf-32le "\x00\xDC\x00\x00" +} -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -constraints deprecated -body { list [catch {encoding convertto jis0208 \\} res] $res -- cgit v0.12 From 22239fb7d2e4d9fae7bc87076d655170b791c46b Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Mar 2023 16:47:08 +0000 Subject: Start on Tcl_ExternalToUtf/Tcl_UtfToExternal tests --- generic/tclTest.c | 124 +++++++++++++++++++++++++++++++++++++++++++++--------- tests/utfext.test | 96 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 21 deletions(-) create mode 100644 tests/utfext.test diff --git a/generic/tclTest.c b/generic/tclTest.c index a398797..eab3eab 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2032,12 +2032,21 @@ static void SpecialFree( * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. * * Side effects: + * * The result in the interpreter is a list of the return code from the * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and - * the encoded binary string. If any of the srcreadvar, dstwrotevar and + * an encoded binary string of length dstLen. Note the string is the + * entire output buffer, not just the part containing the decoded + * portion. This allows for additional checks at test script level. + * + * If any of the srcreadvar, dstwrotevar and * dstcharsvar are specified and not empty, they are treated as names * of variables where the *srcRead, *dstWrote and *dstChars output * from the functions are stored. + * + * The function also checks internally whether nuls are correctly + * appended as requested but the TCL_ENCODING_NO_TERMINATE flag + * and that no buffer overflows occur. *------------------------------------------------------------------------ */ typedef int @@ -2049,13 +2058,15 @@ static int UtfExtWrapper( Tcl_Encoding encoding; int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ Tcl_EncodingState encState; - int flags; Tcl_Size srcLen, bufLen; const unsigned char *bytes; unsigned char *bufPtr; int srcRead, dstLen, dstWrote, dstChars; Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; int result; + int flags; + Tcl_Obj **flagObjs; + int nflags; if (objc < 7 || objc > 10) { Tcl_WrongNumArgs(interp, @@ -2067,9 +2078,48 @@ static int UtfExtWrapper( if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { return TCL_ERROR; } - if (Tcl_GetIntFromObj(interp, objv[4], &flags) != TCL_OK) { - return TCL_ERROR; + + /* Flags may be specified as list of integers and keywords */ + flags = 0; + if (Tcl_ListObjGetElements(interp, objv[4], &nflags, &flagObjs) != TCL_OK) { + return TCL_ERROR; + } + + struct { + const char *flagKey; + int flag; + } flagMap[] = { + {"start", TCL_ENCODING_START}, + {"end", TCL_ENCODING_END}, + {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"noterminate", TCL_ENCODING_NO_TERMINATE}, + {"charlimit", TCL_ENCODING_CHAR_LIMIT}, + {"profiletcl8", TCL_ENCODING_PROFILE_TCL8}, + {"profilestrict", TCL_ENCODING_PROFILE_STRICT}, + {"profilereplace", TCL_ENCODING_PROFILE_REPLACE}, + {NULL, 0} + }; + int i; + for (i = 0; i < nflags; ++i) { + int flag; + if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { + flags |= flag; + } + else { + int idx; + if (Tcl_GetIndexFromObjStruct(interp, + flagObjs[i], + flagMap, + sizeof(flagMap[0]), + "flag", + 0, + &idx) != TCL_OK) { + return TCL_ERROR; + } + flags |= flagMap[idx].flag; + } } + /* Assumes state is integer if not "" */ if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { encState = (Tcl_EncodingState)&encStateValue; @@ -2097,27 +2147,47 @@ static int UtfExtWrapper( if (objc > 9) { if (Tcl_GetCharLength(objv[9])) { dstCharsVar = objv[9]; - } + } } } } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + /* Caller should have specified the dest char limit */ + Tcl_Obj *valueObj; + if (dstCharsVar == NULL || + (valueObj = Tcl_ObjGetVar2(interp, dstCharsVar, NULL, 0)) == NULL + ) { + Tcl_SetResult(interp, + "dstCharsVar must be specified with integer value if " + "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, dstCharsVar, &dstChars) != TCL_OK) { + return TCL_ERROR; + } + } else { + dstChars = 0; /* Only used for output */ + } bufLen = dstLen + 4; /* 4 -> overflow detection */ bufPtr = ckalloc(bufLen); - memmove(bufPtr + dstLen, "\xAB\xCD\xEF\x00", 4); /* overflow detection */ + memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ result = (*transformer)(interp, encoding, bytes, srcLen, flags, &encState, bufPtr, dstLen, srcReadVar ? &srcRead : NULL, &dstWrote, dstCharsVar ? &dstChars : NULL); - if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\x00", 4)) { + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { Tcl_SetResult(interp, "Tcl_ExternalToUtf wrote past output buffer", TCL_STATIC); result = TCL_ERROR; - } else { + } else if (result != TCL_ERROR) { + Tcl_Obj *resultObjs[3]; + switch (result) { case TCL_OK: resultObjs[0] = Tcl_NewStringObj("ok", -1); @@ -2141,22 +2211,34 @@ static int UtfExtWrapper( result = TCL_OK; resultObjs[1] = encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); - resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstWrote); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { - if (Tcl_ObjSetVar2(interp, srcReadVar, NULL, Tcl_NewIntObj(srcRead), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + srcReadVar, + NULL, + Tcl_NewIntObj(srcRead), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstWroteVar) { - if (Tcl_ObjSetVar2(interp, dstWroteVar, NULL, Tcl_NewIntObj(dstWrote), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstWroteVar, + NULL, + Tcl_NewIntObj(dstWrote), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstCharsVar) { - if (Tcl_ObjSetVar2(interp, dstCharsVar, NULL, Tcl_NewIntObj(dstChars), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstCharsVar, + NULL, + Tcl_NewIntObj(dstChars), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); } diff --git a/tests/utfext.test b/tests/utfext.test new file mode 100644 index 0000000..61e36b8 --- /dev/null +++ b/tests/utfext.test @@ -0,0 +1,96 @@ +# This file contains a collection of tests for Tcl_UtfToExternal and +# Tcl_UtfToExternal. Sourcing this file into Tcl runs the tests and generates +# errors. No output means no errors found. +# +# Copyright (c) 2023 Ashok P. Nadkarni +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. + +if {"::tcltest" ni [namespace children]} { + package require tcltest 2.5 + namespace import -force ::tcltest::* +} + +::tcltest::loadTestedCommands +catch [list package require -exact tcl::test [info patchlevel]] + +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testencoding [llength [info commands testencoding]] + +# Maps encoded bytes string to utf-8 equivalents, both in hex +# encoding utf-8 encdata +lappend utfExtMap {*}{ + ascii 414243 414243 +} + +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + +# Simple test with basic flags +proc testbasic {direction enc hexin hexout {flags {start end}}} { + if {$direction eq "toutf"} { + set cmd Tcl_ExternalToUtf + } else { + set cmd Tcl_UtfToExternal + } + set in [binary decode hex $hexin] + set out [binary decode hex $hexout] + set dstlen 40 ;# Should be enough for all encoding tests + + # The C wrapper fills entire destination buffer with FF. + # Anything beyond expected output should have FF's + set filler [string repeat \xFF $dstlen] + set result [string range "$out$filler" 0 $dstlen-1] + test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags {} $dstlen] \ + -result [list ok {} $result] + foreach profile [encoding profiles] { + set flags2 [linsert $flags end profile$profile] + test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags2 {} $dstlen] \ + -result [list ok {} $result] + } +} + +# +# Basic tests +foreach {enc utfhex hex} $utfExtMap { + # Basic test - TCL_ENCODING_START|TCL_ENCODING_END + # Note by default output should be terminated with \0 + testbasic toutf $enc $hex ${utfhex}00 {start end} + testbasic fromutf $enc $utfhex ${hex}00 {start end} + + # Test TCL_ENCODING_NO_TERMINATE + testbasic toutf $enc $hex $utfhex {start end noterminate} + # knownBug - noterminate not obeyed by fromutf + # testbasic fromutf $enc $utfhex $hex {start end noterminate} +} + +# Test for insufficient space +test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { + testencoding Tcl_UtfToExternal unicode A {start end} {} 1 +} -result {nospace {} {}} + +::tcltest::cleanupTests +return + +# Local Variables: +# mode: tcl +# End: -- cgit v0.12 From 844792f3bb8eea9124be41d436c7462f1daa19b9 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 12 Mar 2023 17:10:51 +0000 Subject: Cherrypick yip-656. Start on Tcl_ExternalToUtf/Tcl_UtfToExternal tests --- generic/tclTest.c | 124 +++++++++++++++++++++++++++++++++++++++++++++--------- tests/utfext.test | 96 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 199 insertions(+), 21 deletions(-) create mode 100644 tests/utfext.test diff --git a/generic/tclTest.c b/generic/tclTest.c index 33205fb..e2d8b3b 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1996,12 +1996,21 @@ static void SpecialFree( * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. * * Side effects: + * * The result in the interpreter is a list of the return code from the * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and - * the encoded binary string. If any of the srcreadvar, dstwrotevar and + * an encoded binary string of length dstLen. Note the string is the + * entire output buffer, not just the part containing the decoded + * portion. This allows for additional checks at test script level. + * + * If any of the srcreadvar, dstwrotevar and * dstcharsvar are specified and not empty, they are treated as names * of variables where the *srcRead, *dstWrote and *dstChars output * from the functions are stored. + * + * The function also checks internally whether nuls are correctly + * appended as requested but the TCL_ENCODING_NO_TERMINATE flag + * and that no buffer overflows occur. *------------------------------------------------------------------------ */ typedef int @@ -2013,13 +2022,15 @@ static int UtfExtWrapper( Tcl_Encoding encoding; int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ Tcl_EncodingState encState; - int flags; Tcl_Size srcLen, bufLen; const unsigned char *bytes; unsigned char *bufPtr; int srcRead, dstLen, dstWrote, dstChars; Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; int result; + int flags; + Tcl_Obj **flagObjs; + int nflags; if (objc < 7 || objc > 10) { Tcl_WrongNumArgs(interp, @@ -2031,9 +2042,48 @@ static int UtfExtWrapper( if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { return TCL_ERROR; } - if (Tcl_GetIntFromObj(interp, objv[4], &flags) != TCL_OK) { - return TCL_ERROR; + + /* Flags may be specified as list of integers and keywords */ + flags = 0; + if (Tcl_ListObjGetElements(interp, objv[4], &nflags, &flagObjs) != TCL_OK) { + return TCL_ERROR; + } + + struct { + const char *flagKey; + int flag; + } flagMap[] = { + {"start", TCL_ENCODING_START}, + {"end", TCL_ENCODING_END}, + {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"noterminate", TCL_ENCODING_NO_TERMINATE}, + {"charlimit", TCL_ENCODING_CHAR_LIMIT}, + {"profiletcl8", TCL_ENCODING_PROFILE_TCL8}, + {"profilestrict", TCL_ENCODING_PROFILE_STRICT}, + {"profilereplace", TCL_ENCODING_PROFILE_REPLACE}, + {NULL, 0} + }; + int i; + for (i = 0; i < nflags; ++i) { + int flag; + if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { + flags |= flag; + } + else { + int idx; + if (Tcl_GetIndexFromObjStruct(interp, + flagObjs[i], + flagMap, + sizeof(flagMap[0]), + "flag", + 0, + &idx) != TCL_OK) { + return TCL_ERROR; + } + flags |= flagMap[idx].flag; + } } + /* Assumes state is integer if not "" */ if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { encState = (Tcl_EncodingState)&encStateValue; @@ -2061,27 +2111,47 @@ static int UtfExtWrapper( if (objc > 9) { if (Tcl_GetCharLength(objv[9])) { dstCharsVar = objv[9]; - } + } } } } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + /* Caller should have specified the dest char limit */ + Tcl_Obj *valueObj; + if (dstCharsVar == NULL || + (valueObj = Tcl_ObjGetVar2(interp, dstCharsVar, NULL, 0)) == NULL + ) { + Tcl_SetResult(interp, + "dstCharsVar must be specified with integer value if " + "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, dstCharsVar, &dstChars) != TCL_OK) { + return TCL_ERROR; + } + } else { + dstChars = 0; /* Only used for output */ + } bufLen = dstLen + 4; /* 4 -> overflow detection */ bufPtr = Tcl_Alloc(bufLen); - memmove(bufPtr + dstLen, "\xAB\xCD\xEF\x00", 4); /* overflow detection */ + memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ result = (*transformer)(interp, encoding, bytes, srcLen, flags, &encState, bufPtr, dstLen, srcReadVar ? &srcRead : NULL, &dstWrote, dstCharsVar ? &dstChars : NULL); - if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\x00", 4)) { + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { Tcl_SetResult(interp, "Tcl_ExternalToUtf wrote past output buffer", TCL_STATIC); result = TCL_ERROR; - } else { + } else if (result != TCL_ERROR) { + Tcl_Obj *resultObjs[3]; + switch (result) { case TCL_OK: resultObjs[0] = Tcl_NewStringObj("ok", -1); @@ -2105,22 +2175,34 @@ static int UtfExtWrapper( result = TCL_OK; resultObjs[1] = encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); - resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstWrote); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { - if (Tcl_ObjSetVar2(interp, srcReadVar, NULL, Tcl_NewIntObj(srcRead), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + srcReadVar, + NULL, + Tcl_NewIntObj(srcRead), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstWroteVar) { - if (Tcl_ObjSetVar2(interp, dstWroteVar, NULL, Tcl_NewIntObj(dstWrote), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstWroteVar, + NULL, + Tcl_NewIntObj(dstWrote), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } if (dstCharsVar) { - if (Tcl_ObjSetVar2(interp, dstCharsVar, NULL, Tcl_NewIntObj(dstChars), 0) == NULL) { - result = TCL_ERROR; - } - } + if (Tcl_ObjSetVar2(interp, + dstCharsVar, + NULL, + Tcl_NewIntObj(dstChars), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); } diff --git a/tests/utfext.test b/tests/utfext.test new file mode 100644 index 0000000..61e36b8 --- /dev/null +++ b/tests/utfext.test @@ -0,0 +1,96 @@ +# This file contains a collection of tests for Tcl_UtfToExternal and +# Tcl_UtfToExternal. Sourcing this file into Tcl runs the tests and generates +# errors. No output means no errors found. +# +# Copyright (c) 2023 Ashok P. Nadkarni +# +# See the file "license.terms" for information on usage and redistribution +# of this file, and for a DISCLAIMER OF ALL WARRANTIES. + +if {"::tcltest" ni [namespace children]} { + package require tcltest 2.5 + namespace import -force ::tcltest::* +} + +::tcltest::loadTestedCommands +catch [list package require -exact tcl::test [info patchlevel]] + +testConstraint testbytestring [llength [info commands testbytestring]] +testConstraint testencoding [llength [info commands testencoding]] + +# Maps encoded bytes string to utf-8 equivalents, both in hex +# encoding utf-8 encdata +lappend utfExtMap {*}{ + ascii 414243 414243 +} + +if {[info commands printable] eq ""} { + proc printable {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print + } +} + +# Simple test with basic flags +proc testbasic {direction enc hexin hexout {flags {start end}}} { + if {$direction eq "toutf"} { + set cmd Tcl_ExternalToUtf + } else { + set cmd Tcl_UtfToExternal + } + set in [binary decode hex $hexin] + set out [binary decode hex $hexout] + set dstlen 40 ;# Should be enough for all encoding tests + + # The C wrapper fills entire destination buffer with FF. + # Anything beyond expected output should have FF's + set filler [string repeat \xFF $dstlen] + set result [string range "$out$filler" 0 $dstlen-1] + test $cmd-$enc-$hexin-[join $flags -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags {} $dstlen] \ + -result [list ok {} $result] + foreach profile [encoding profiles] { + set flags2 [linsert $flags end profile$profile] + test $cmd-$enc-$hexin-[join $flags2 -] "$cmd - $enc - $hexin - $flags" -body \ + [list testencoding $cmd $enc $in $flags2 {} $dstlen] \ + -result [list ok {} $result] + } +} + +# +# Basic tests +foreach {enc utfhex hex} $utfExtMap { + # Basic test - TCL_ENCODING_START|TCL_ENCODING_END + # Note by default output should be terminated with \0 + testbasic toutf $enc $hex ${utfhex}00 {start end} + testbasic fromutf $enc $utfhex ${hex}00 {start end} + + # Test TCL_ENCODING_NO_TERMINATE + testbasic toutf $enc $hex $utfhex {start end noterminate} + # knownBug - noterminate not obeyed by fromutf + # testbasic fromutf $enc $utfhex $hex {start end noterminate} +} + +# Test for insufficient space +test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { + testencoding Tcl_UtfToExternal unicode A {start end} {} 1 +} -result {nospace {} {}} + +::tcltest::cleanupTests +return + +# Local Variables: +# mode: tcl +# End: -- cgit v0.12 From 967e55306e7bb0a58a7cf2c5b905a2608f395875 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 13 Mar 2023 12:22:06 +0000 Subject: Fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 38 ++++++++++++++---------- tests/ioTrans.test | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 17 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 85ff39b..715f8c7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8551,6 +8551,7 @@ UpdateInterest( mask &= ~TCL_EXCEPTION; if (!statePtr->timer) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc, chanPtr); } @@ -8584,23 +8585,28 @@ ChannelTimerProc( ChannelState *statePtr = chanPtr->state; /* State info for channel */ - if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) - && (statePtr->interestMask & TCL_READABLE) - && (statePtr->inQueueHead != NULL) - && IsBufferReady(statePtr->inQueueHead)) { - /* - * Restart the timer in case a channel handler reenters the event loop - * before UpdateInterest gets called by Tcl_NotifyChannel. - */ - - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); - Tcl_Preserve(statePtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); - Tcl_Release(statePtr); + if (chanPtr->typePtr == NULL) { + TclChannelRelease((Tcl_Channel)chanPtr); } else { - statePtr->timer = NULL; - UpdateInterest(chanPtr); + if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) + && (statePtr->interestMask & TCL_READABLE) + && (statePtr->inQueueHead != NULL) + && IsBufferReady(statePtr->inQueueHead)) { + /* + * Restart the timer in case a channel handler reenters the event loop + * before UpdateInterest gets called by Tcl_NotifyChannel. + */ + + statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, + ChannelTimerProc,chanPtr); + Tcl_Preserve(statePtr); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + Tcl_Release(statePtr); + } else { + statePtr->timer = NULL; + UpdateInterest(chanPtr); + TclChannelRelease((Tcl_Channel)chanPtr); + } } } diff --git a/tests/ioTrans.test b/tests/ioTrans.test index f185117..130ff80 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -634,6 +634,58 @@ test iortrans-4.9 {chan read, gets, bug 2921116} -setup { } } + + +namespace eval reflector { + proc initialize {_ chan mode} { + return {initialize finalize watch read} + } + + + proc finalize {_ chan} { + namespace delete $_ + } + + + proc read {_ chan count} { + namespace upvar $_ source source + set res [string range $source 0 $count-1] + set source [string range $source $count end] + return $res + } + + + proc watch {_ chan events} { + after 0 [list chan postevent $chan read] + return read + } + + namespace ensemble create -parameters _ + namespace export * +} + + + + +namespace eval inputfilter { + proc initialize {chan mode} { + return {initialize finalize read} + } + + proc read {chan buffer} { + return $buffer + } + + proc finalize chan { + namespace delete $chan + } + + namespace ensemble create + namespace export * +} + + + # Channel read transform that is just the identity - pass all through proc idxform {cmd handle args} { switch -- $cmd { @@ -2089,7 +2141,39 @@ test iortrans.tf-11.1 {origin thread of moved transform destroyed during access} thread::release $tidb } -result {Owner lost} -# ### ### ### ######### ######### ######### + +test iortrans-ea69b0258a9833cb { + Crash when using a channel transformation on TCP client socket + + "line two" does not make it into result. This issue should probably be + addressed, but it is outside the scope of this test. +} -setup { + set res {} + set read 0 +} -body { + namespace eval reflector1 { + variable source "line one\nline two" + interp alias {} [namespace current]::dispatch {} [ + namespace parent]::reflector [namespace current] + } + set chan [chan create read [namespace which reflector1::dispatch]] + chan configure $chan -blocking 0 + chan push $chan inputfilter + chan event $chan read [list ::apply [list chan { + variable res + variable read + set gets [gets $chan] + append res $gets + incr read + } [namespace current]] $chan] + vwait [namespace current]::read + chan pop $chan + vwait [namespace current]::read + return $res +} -cleanup { + catch {unset read} + close $chan +} -result {line one} cleanupTests return -- cgit v0.12 From df8a3a6ea4a6ede9d9be56eacae69d8f40d624ca Mon Sep 17 00:00:00 2001 From: pooryorick Date: Mon, 13 Mar 2023 13:36:52 +0000 Subject: Fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 67 +++++++++++++++++++++++++----------------- tests/ioTrans.test | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 125 insertions(+), 28 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index da06171..58137a5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8804,6 +8804,7 @@ UpdateInterest( mask &= ~TCL_EXCEPTION; if (!statePtr->timer) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc, chanPtr); } @@ -8814,6 +8815,7 @@ UpdateInterest( && mask & TCL_WRITABLE && GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + TclChannelPreserve((Tcl_Channel)chanPtr); statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc,chanPtr); } @@ -8848,44 +8850,55 @@ ChannelTimerProc( /* State info for channel */ ChannelState *statePtr = chanPtr->state; - /* Preserve chanPtr to guard against deallocation in Tcl_NotifyChannel. */ - TclChannelPreserve((Tcl_Channel)chanPtr); - Tcl_Preserve(statePtr); - statePtr->timer = NULL; - if (statePtr->interestMask & TCL_WRITABLE - && GotFlag(statePtr, CHANNEL_NONBLOCKING) - && !GotFlag(statePtr, BG_FLUSH_SCHEDULED) - ) { - /* - * Restart the timer in case a channel handler reenters the event loop - * before UpdateInterest gets called by Tcl_NotifyChannel. - */ - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_WRITABLE); - } + /* TclChannelPreserve() must be called before the current function was + * scheduled, is already in effect. In this function it guards against + * deallocation in Tcl_NotifyChannel and also keps the channel preserved + * until ChannelTimerProc is later called again. + */ - /* The channel may have just been closed from within Tcl_NotifyChannel */ - if (!GotFlag(statePtr, CHANNEL_INCLOSE)) { - if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) - && (statePtr->interestMask & TCL_READABLE) - && (statePtr->inQueueHead != NULL) - && IsBufferReady(statePtr->inQueueHead)) { + if (chanPtr->typePtr == NULL) { + TclChannelRelease((Tcl_Channel)chanPtr); + } else { + Tcl_Preserve(statePtr); + statePtr->timer = NULL; + if (statePtr->interestMask & TCL_WRITABLE + && GotFlag(statePtr, CHANNEL_NONBLOCKING) + && !GotFlag(statePtr, BG_FLUSH_SCHEDULED) + ) { /* * Restart the timer in case a channel handler reenters the event loop * before UpdateInterest gets called by Tcl_NotifyChannel. */ - statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc,chanPtr); - Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_WRITABLE); } else { - UpdateInterest(chanPtr); + /* The channel may have just been closed from within Tcl_NotifyChannel */ + if (!GotFlag(statePtr, CHANNEL_INCLOSE)) { + if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) + && (statePtr->interestMask & TCL_READABLE) + && (statePtr->inQueueHead != NULL) + && IsBufferReady(statePtr->inQueueHead)) { + /* + * Restart the timer in case a channel handler reenters the event loop + * before UpdateInterest gets called by Tcl_NotifyChannel. + */ + + statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, + ChannelTimerProc,chanPtr); + Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); + } else { + TclChannelRelease((Tcl_Channel)chanPtr); + UpdateInterest(chanPtr); + } + } else { + TclChannelRelease((Tcl_Channel)chanPtr); + } } + + Tcl_Release(statePtr); } - Tcl_Release(statePtr); - TclChannelRelease((Tcl_Channel)chanPtr); } /* diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 79493e0..f481a17 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -634,6 +634,58 @@ test iortrans-4.9 {chan read, gets, bug 2921116} -setup { } } + + +namespace eval reflector { + proc initialize {_ chan mode} { + return {initialize finalize watch read} + } + + + proc finalize {_ chan} { + namespace delete $_ + } + + + proc read {_ chan count} { + namespace upvar $_ source source + set res [string range $source 0 $count-1] + set source [string range $source $count end] + return $res + } + + + proc watch {_ chan events} { + after 0 [list chan postevent $chan read] + return read + } + + namespace ensemble create -parameters _ + namespace export * +} + + + + +namespace eval inputfilter { + proc initialize {chan mode} { + return {initialize finalize read} + } + + proc read {chan buffer} { + return $buffer + } + + proc finalize chan { + namespace delete $chan + } + + namespace ensemble create + namespace export * +} + + + # Channel read transform that is just the identity - pass all through proc idxform {cmd handle args} { switch -- $cmd { @@ -2089,7 +2141,39 @@ test iortrans.tf-11.1 {origin thread of moved transform destroyed during access} thread::release $tidb } -result {Owner lost} -# ### ### ### ######### ######### ######### + +test iortrans-ea69b0258a9833cb { + Crash when using a channel transformation on TCP client socket + + "line two" does not make it into result. This issue should probably be + addressed, but it is outside the scope of this test. +} -setup { + set res {} + set read 0 +} -body { + namespace eval reflector1 { + variable source "line one\nline two" + interp alias {} [namespace current]::dispatch {} [ + namespace parent]::reflector [namespace current] + } + set chan [chan create read [namespace which reflector1::dispatch]] + chan configure $chan -blocking 0 + chan push $chan inputfilter + chan event $chan read [list ::apply [list chan { + variable res + variable read + set gets [gets $chan] + append res $gets + incr read + } [namespace current]] $chan] + vwait [namespace current]::read + chan pop $chan + vwait [namespace current]::read + return $res +} -cleanup { + catch {unset read} + close $chan +} -result {line one} cleanupTests return -- cgit v0.12 From 6d7423228211f312016f0c62ce1bc86c3d3777db Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 13 Mar 2023 13:44:35 +0000 Subject: Bug [183a1adcc0]. Buffer overflow in Tcl_UtfToExternal --- generic/tclEncoding.c | 14 +++ generic/tclTest.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++- tests/encoding.test | 35 ++++++++ 3 files changed, 283 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2b3b614..92217f3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1233,6 +1233,9 @@ Tcl_ExternalToUtf( } if (!noTerminate) { + if (dstLen < 1) { + return TCL_CONVERT_NOSPACE; + } /* * If there are any null characters in the middle of the buffer, * they will converted to the UTF-8 null character (\xC080). To get @@ -1241,6 +1244,10 @@ Tcl_ExternalToUtf( */ dstLen--; + } else { + if (dstLen < 0) { + return TCL_CONVERT_NOSPACE; + } } do { Tcl_EncodingState savedState = *statePtr; @@ -1415,10 +1422,17 @@ Tcl_UtfToExternal( dstCharsPtr = &dstChars; } + if (dstLen < encodingPtr->nullSize) { + return TCL_CONVERT_NOSPACE; + } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); + /* + * Buffer is terminated irrespective of result. Not sure this is + * reasonable but keep for historical/compatibility reasons. + */ if (encodingPtr->nullSize == 2) { dst[*dstWrotePtr + 1] = '\0'; } diff --git a/generic/tclTest.c b/generic/tclTest.c index bc51c99..c2b7144 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -1817,6 +1817,234 @@ static void SpecialFree(blockPtr) } /* + *------------------------------------------------------------------------ + * + * UtfTransformFn -- + * + * Implements a direct call into Tcl_UtfToExternal and Tcl_ExternalToUtf + * as otherwise there is no script level command that directly exercises + * these functions (i/o command cannot test all combinations) + * The arguments at the script level are roughly those of the above + * functions: + * encodingname srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar? + * + * Results: + * TCL_OK or TCL_ERROR. This any errors running the test, NOT the + * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. + * + * Side effects: + * + * The result in the interpreter is a list of the return code from the + * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and + * an encoded binary string of length dstLen. Note the string is the + * entire output buffer, not just the part containing the decoded + * portion. This allows for additional checks at test script level. + * + * If any of the srcreadvar, dstwrotevar and + * dstcharsvar are specified and not empty, they are treated as names + * of variables where the *srcRead, *dstWrote and *dstChars output + * from the functions are stored. + * + * The function also checks internally whether nuls are correctly + * appended as requested but the TCL_ENCODING_NO_TERMINATE flag + * and that no buffer overflows occur. + *------------------------------------------------------------------------ + */ +typedef int +UtfTransformFn(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +static int UtfExtWrapper( + Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) +{ + Tcl_Encoding encoding; + Tcl_EncodingState encState, *encStatePtr; + int srcLen, bufLen; + const char *bytes; + char *bufPtr; + int srcRead, dstLen, dstWrote, dstChars; + Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; + int result; + int flags; + Tcl_Obj **flagObjs; + int nflags; + + if (objc < 7 || objc > 10) { + Tcl_WrongNumArgs(interp, + 2, + objv, + "encoding srcbytes flags state dstlen ?srcreadvar? ?dstwrotevar? ?dstcharsvar?"); + return TCL_ERROR; + } + if (Tcl_GetEncodingFromObj(interp, objv[2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + + /* Flags may be specified as list of integers and keywords */ + flags = 0; + if (Tcl_ListObjGetElements(interp, objv[4], &nflags, &flagObjs) != TCL_OK) { + return TCL_ERROR; + } + + struct { + const char *flagKey; + int flag; + } flagMap[] = { + {"start", TCL_ENCODING_START}, + {"end", TCL_ENCODING_END}, + {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"noterminate", TCL_ENCODING_NO_TERMINATE}, + {"charlimit", TCL_ENCODING_CHAR_LIMIT}, + {NULL, 0} + }; + int i; + for (i = 0; i < nflags; ++i) { + int flag; + if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { + flags |= flag; + } + else { + int idx; + if (Tcl_GetIndexFromObjStruct(interp, + flagObjs[i], + flagMap, + sizeof(flagMap[0]), + "flag", + 0, + &idx) != TCL_OK) { + return TCL_ERROR; + } + flags |= flagMap[idx].flag; + } + } + + /* Assumes state is integer if not "" */ + Tcl_WideInt wide; + if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { + encState = (Tcl_EncodingState) wide; + encStatePtr = &encState; + } else if (Tcl_GetCharLength(objv[5]) == 0) { + encStatePtr = NULL; + } else { + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { + return TCL_ERROR; + } + srcReadVar = NULL; + dstWroteVar = NULL; + dstCharsVar = NULL; + if (objc > 7) { + /* Has caller requested srcRead? */ + if (Tcl_GetCharLength(objv[7])) { + srcReadVar = objv[7]; + } + if (objc > 8) { + /* Ditto for dstWrote */ + if (Tcl_GetCharLength(objv[8])) { + dstWroteVar = objv[8]; + } + if (objc > 9) { + if (Tcl_GetCharLength(objv[9])) { + dstCharsVar = objv[9]; + } + } + } + } + if (flags & TCL_ENCODING_CHAR_LIMIT) { + /* Caller should have specified the dest char limit */ + Tcl_Obj *valueObj; + if (dstCharsVar == NULL || + (valueObj = Tcl_ObjGetVar2(interp, dstCharsVar, NULL, 0)) == NULL + ) { + Tcl_SetResult(interp, + "dstCharsVar must be specified with integer value if " + "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); + return TCL_ERROR; + } + if (Tcl_GetIntFromObj(interp, valueObj, &dstChars) != TCL_OK) { + return TCL_ERROR; + } + } else { + dstChars = 0; /* Only used for output */ + } + + bufLen = dstLen + 4; /* 4 -> overflow detection */ + bufPtr = ckalloc(bufLen); + memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ + memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ + bytes = (char *) Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ + result = (*transformer)(interp, encoding, bytes, srcLen, flags, + encStatePtr, bufPtr, dstLen, + srcReadVar ? &srcRead : NULL, + &dstWrote, + dstCharsVar ? &dstChars : NULL); + if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { + Tcl_SetResult(interp, + "Tcl_ExternalToUtf wrote past output buffer", + TCL_STATIC); + result = TCL_ERROR; + } else if (result != TCL_ERROR) { + Tcl_Obj *resultObjs[3]; + switch (result) { + case TCL_OK: + resultObjs[0] = Tcl_NewStringObj("ok", -1); + break; + case TCL_CONVERT_MULTIBYTE: + resultObjs[0] = Tcl_NewStringObj("multibyte", -1); + break; + case TCL_CONVERT_SYNTAX: + resultObjs[0] = Tcl_NewStringObj("syntax", -1); + break; + case TCL_CONVERT_UNKNOWN: + resultObjs[0] = Tcl_NewStringObj("unknown", -1); + break; + case TCL_CONVERT_NOSPACE: + resultObjs[0] = Tcl_NewStringObj("nospace", -1); + break; + default: + resultObjs[0] = Tcl_NewIntObj(result); + break; + } + result = TCL_OK; + resultObjs[1] = + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); + resultObjs[2] = Tcl_NewByteArrayObj((unsigned char *)bufPtr, dstLen); + if (srcReadVar) { + if (Tcl_ObjSetVar2(interp, + srcReadVar, + NULL, + Tcl_NewIntObj(srcRead), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + if (dstWroteVar) { + if (Tcl_ObjSetVar2(interp, + dstWroteVar, + NULL, + Tcl_NewIntObj(dstWrote), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + if (dstCharsVar) { + if (Tcl_ObjSetVar2(interp, + dstCharsVar, + NULL, + Tcl_NewIntObj(dstChars), + TCL_LEAVE_ERR_MSG) == NULL) { + result = TCL_ERROR; + } + } + Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); + } + + ckfree(bufPtr); + Tcl_FreeEncoding(encoding); /* Free returned reference */ + return result; +} + +/* *---------------------------------------------------------------------- * * TestencodingCmd -- @@ -1845,10 +2073,10 @@ TestencodingObjCmd( const char *string; TclEncoding *encodingPtr; static const char *const optionStrings[] = { - "create", "delete", NULL + "create", "delete", "Tcl_ExternalToUtf", "Tcl_UtfToExternal", NULL }; enum options { - ENC_CREATE, ENC_DELETE + ENC_CREATE, ENC_DELETE, ENC_EXTTOUTF, ENC_UTFTOEXT }; if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0, @@ -1894,6 +2122,10 @@ TestencodingObjCmd( Tcl_FreeEncoding(encoding); Tcl_FreeEncoding(encoding); break; + case ENC_EXTTOUTF: + return UtfExtWrapper(interp,Tcl_ExternalToUtf,objc,objv); + case ENC_UTFTOEXT: + return UtfExtWrapper(interp,Tcl_UtfToExternal,objc,objv); } return TCL_OK; } diff --git a/tests/encoding.test b/tests/encoding.test index f6f9abc..26efb19 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -739,6 +739,41 @@ test encoding-28.0 {all encodings load} -body { runtests +test encoding-bug-183a1adcc0-1 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 1} result] $result +} -result [list 0 [list nospace {} \xff]] + +test encoding-bug-183a1adcc0-2 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 0} result] $result +} -result [list 0 [list nospace {} {}]] + +test encoding-bug-183a1adcc0-3 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 2} result] $result +} -result [list 0 [list nospace {} \x00\x00]] + +test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 3} result] $result +} -result [list 0 [list nospace {} \x00\x00\xff]] + +test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { + testencoding +} -body { + # Note - buffers are initialized to \xff + list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result +} -result [list 0 [list ok {} [expr {$::tcl_platform(byteOrder) eq "littleEndian" ? "\x41\x00" : "\x00\x41"}]\x00\x00]] + } # cleanup -- cgit v0.12 From 95158a2d57b3724c868c22025657b56c2812f4d5 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 13 Mar 2023 16:32:55 +0000 Subject: Fix passing of encoding state in testencoding Tcl_UtfToExternal --- generic/tclTest.c | 30 ++++++++++++++++-------------- tests/utfext.test | 5 +++++ 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index eab3eab..6860e53 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2031,19 +2031,19 @@ static void SpecialFree( * TCL_OK or TCL_ERROR. This any errors running the test, NOT the * result of Tcl_UtfToExternal or Tcl_ExternalToUtf. * - * Side effects: + * Side effects: * * The result in the interpreter is a list of the return code from the * Tcl_UtfToExternal/Tcl_ExternalToUtf functions, the encoding state, and * an encoded binary string of length dstLen. Note the string is the * entire output buffer, not just the part containing the decoded * portion. This allows for additional checks at test script level. - * - * If any of the srcreadvar, dstwrotevar and + * + * If any of the srcreadvar, dstwrotevar and * dstcharsvar are specified and not empty, they are treated as names * of variables where the *srcRead, *dstWrote and *dstChars output * from the functions are stored. - * + * * The function also checks internally whether nuls are correctly * appended as requested but the TCL_ENCODING_NO_TERMINATE flag * and that no buffer overflows occur. @@ -2056,8 +2056,7 @@ static int UtfExtWrapper( Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) { Tcl_Encoding encoding; - int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ - Tcl_EncodingState encState; + Tcl_EncodingState encState, *encStatePtr; Tcl_Size srcLen, bufLen; const unsigned char *bytes; unsigned char *bufPtr; @@ -2121,13 +2120,16 @@ static int UtfExtWrapper( } /* Assumes state is integer if not "" */ - if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { - encState = (Tcl_EncodingState)&encStateValue; + Tcl_WideInt wide; + if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { + encState = (Tcl_EncodingState) wide; + encStatePtr = &encState; } else if (Tcl_GetCharLength(objv[5]) == 0) { - encState = NULL; + encStatePtr = NULL; } else { return TCL_ERROR; } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { return TCL_ERROR; } @@ -2162,7 +2164,7 @@ static int UtfExtWrapper( "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); return TCL_ERROR; } - if (Tcl_GetIntFromObj(interp, dstCharsVar, &dstChars) != TCL_OK) { + if (Tcl_GetIntFromObj(interp, valueObj, &dstChars) != TCL_OK) { return TCL_ERROR; } } else { @@ -2170,12 +2172,12 @@ static int UtfExtWrapper( } bufLen = dstLen + 4; /* 4 -> overflow detection */ - bufPtr = ckalloc(bufLen); + bufPtr = (unsigned char *) ckalloc(bufLen); memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ - result = (*transformer)(interp, encoding, bytes, srcLen, flags, - &encState, bufPtr, dstLen, + result = (*transformer)(interp, encoding, (const char *)bytes, srcLen, flags, + encStatePtr, (char *) bufPtr, dstLen, srcReadVar ? &srcRead : NULL, &dstWrote, dstCharsVar ? &dstChars : NULL); @@ -2210,7 +2212,7 @@ static int UtfExtWrapper( } result = TCL_OK; resultObjs[1] = - encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { if (Tcl_ObjSetVar2(interp, diff --git a/tests/utfext.test b/tests/utfext.test index 61e36b8..6cf3dd7 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -88,6 +88,11 @@ test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { testencoding Tcl_UtfToExternal unicode A {start end} {} 1 } -result {nospace {} {}} +# Another bug - char limit not obeyed +# % set cv 2 +# % testencoding Tcl_ExternalToUtf utf-8 abcdefgh {start end noterminate charlimit} {} 20 rv wv cv +# nospace {} abcÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ + ::tcltest::cleanupTests return -- cgit v0.12 From 18a99f2522b77516b62a0d44dca1c90b3479bda1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Mar 2023 10:05:24 +0000 Subject: Add "ucs-2" constraint to encoding-bug-183a1adcc0-5 testcase, otherwise it fails with TCL_UTF_MAX>3. Broken by [47857515422b8519|this] commit --- tests/encoding.test | 2 +- tests/ioTrans.test | 2 +- win/tclWinTest.c | 10 +++++----- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 26efb19..bac80c9 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -768,7 +768,7 @@ test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExtern } -result [list 0 [list nospace {} \x00\x00\xff]] test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding + testencoding ucs-2 } -body { # Note - buffers are initialized to \xff list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 130ff80..3a23e61 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -671,7 +671,7 @@ namespace eval inputfilter { proc initialize {chan mode} { return {initialize finalize read} } - + proc read {chan buffer} { return $buffer } diff --git a/win/tclWinTest.c b/win/tclWinTest.c index d70d217..6ca49f6 100644 --- a/win/tclWinTest.c +++ b/win/tclWinTest.c @@ -419,7 +419,7 @@ TestplatformChmod( const char *nativePath, int pmode) { - /* + /* * Note FILE_DELETE_CHILD missing from dirWriteMask because we do * not want overriding of child's delete setting when testing */ @@ -427,7 +427,7 @@ TestplatformChmod( FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_ADD_FILE | FILE_ADD_SUBDIRECTORY | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD dirReadMask = + static const DWORD dirReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_LIST_DIRECTORY | STANDARD_RIGHTS_READ | SYNCHRONIZE; /* Note - default user privileges allow ignoring TRAVERSE setting */ @@ -437,7 +437,7 @@ TestplatformChmod( static const DWORD fileWriteMask = FILE_WRITE_ATTRIBUTES | FILE_WRITE_EA | FILE_WRITE_DATA | FILE_APPEND_DATA | STANDARD_RIGHTS_WRITE | DELETE | SYNCHRONIZE; - static const DWORD fileReadMask = + static const DWORD fileReadMask = FILE_READ_ATTRIBUTES | FILE_READ_EA | FILE_READ_DATA | STANDARD_RIGHTS_READ | SYNCHRONIZE; static const DWORD fileExecuteMask = @@ -471,7 +471,7 @@ TestplatformChmod( if (!OpenProcessToken(GetCurrentProcess(), TOKEN_QUERY, &hToken)) { goto done; } - + /* Get process SID */ if (!GetTokenInformation(hToken, TokenUser, NULL, 0, &dw) && GetLastError() != ERROR_INSUFFICIENT_BUFFER) { @@ -489,7 +489,7 @@ TestplatformChmod( ckfree(aceEntry[nSids].pSid); /* Since we have not ++'ed nSids */ goto done; } - /* + /* * Always include DACL modify rights so we don't get locked out */ aceEntry[nSids].mask = READ_CONTROL | WRITE_DAC | WRITE_OWNER | SYNCHRONIZE | -- cgit v0.12 From 3aef0c58f9614b4dd1b9eb4201238789cd9022fa Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 14 Mar 2023 20:27:04 +0000 Subject: Further fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 30 +++++++++++++++++++++++------- generic/tclIO.h | 3 +++ 2 files changed, 26 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 715f8c7..55b6bdc 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1699,6 +1699,7 @@ Tcl_CreateChannel( statePtr->scriptRecordPtr = NULL; statePtr->bufSize = CHANNELBUFFER_DEFAULT_SIZE; statePtr->timer = NULL; + statePtr->timerChanPtr = NULL; statePtr->csPtrR = NULL; statePtr->csPtrW = NULL; statePtr->outputStage = NULL; @@ -3093,7 +3094,13 @@ CloseChannel( * Cancel any outstanding timer. */ - Tcl_DeleteTimerHandler(statePtr->timer); + if (statePtr->timer != NULL) { + Tcl_DeleteTimerHandler(statePtr->timer); + statePtr->timer = NULL; + TclChannelRelease((Tcl_Channel)statePtr->timerChanPtr); + statePtr->timerChanPtr = NULL; + } + /* * Mark the channel as deleted by clearing the type structure. @@ -3912,7 +3919,12 @@ Tcl_ClearChannelHandlers( * Cancel any outstanding timer. */ - Tcl_DeleteTimerHandler(statePtr->timer); + if (statePtr->timer != NULL) { + Tcl_DeleteTimerHandler(statePtr->timer); + statePtr->timer = NULL; + TclChannelRelease((Tcl_Channel)statePtr->timerChanPtr); + statePtr->timerChanPtr = NULL; + } /* * Remove any references to channel handlers for this channel that may be @@ -8552,8 +8564,9 @@ UpdateInterest( if (!statePtr->timer) { TclChannelPreserve((Tcl_Channel)chanPtr); + statePtr->timerChanPtr = chanPtr; statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc, chanPtr); + ChannelTimerProc, chanPtr); } } } @@ -8582,11 +8595,13 @@ ChannelTimerProc( ClientData clientData) { Channel *chanPtr = (Channel *)clientData; + /* State info for channel */ ChannelState *statePtr = chanPtr->state; - /* State info for channel */ if (chanPtr->typePtr == NULL) { - TclChannelRelease((Tcl_Channel)chanPtr); + statePtr->timer = NULL; + TclChannelRelease((Tcl_Channel)statePtr->timerChanPtr); + statePtr->timerChanPtr = NULL; } else { if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) && (statePtr->interestMask & TCL_READABLE) @@ -8598,14 +8613,15 @@ ChannelTimerProc( */ statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); + ChannelTimerProc,chanPtr); Tcl_Preserve(statePtr); Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); Tcl_Release(statePtr); } else { statePtr->timer = NULL; UpdateInterest(chanPtr); - TclChannelRelease((Tcl_Channel)chanPtr); + TclChannelRelease((Tcl_Channel)statePtr->timerChanPtr); + statePtr->timerChanPtr = NULL; } } } diff --git a/generic/tclIO.h b/generic/tclIO.h index eccc7a9..03bbce8 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -188,6 +188,9 @@ typedef struct ChannelState { * handlers ("fileevent") on this channel. */ int bufSize; /* What size buffers to allocate? */ Tcl_TimerToken timer; /* Handle to wakeup timer for this channel. */ + Channel *timerChanPtr; /* Needed in order to decrement the refCount of + the right channel when the timer is + deleted. */ struct CopyState *csPtrR; /* State of background copy for which channel * is input, or NULL. */ struct CopyState *csPtrW; /* State of background copy for which channel -- cgit v0.12 From 6ded4b92be27dd73c424f6d524d1a0578621c126 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 15 Mar 2023 08:42:11 +0000 Subject: Further fix for issue [ea69b0258a9833cb], crash when using a channel transformation on TCP client socket. --- generic/tclIO.c | 52 ++++++++++++++++++++++++++++++++++++---------------- generic/tclIO.h | 3 +++ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 58137a5..08c52a7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -165,6 +165,7 @@ static int CheckForDeadChannel(Tcl_Interp *interp, static void CheckForStdChannelsBeingClosed(Tcl_Channel chan); static void CleanupChannelHandlers(Tcl_Interp *interp, Channel *chanPtr); +static void CleanupTimerHandler(ChannelState *statePtr); static int CloseChannel(Tcl_Interp *interp, Channel *chanPtr, int errorCode); static int CloseChannelPart(Tcl_Interp *interp, Channel *chanPtr, @@ -172,6 +173,7 @@ static int CloseChannelPart(Tcl_Interp *interp, Channel *chanPtr, static int CloseWrite(Tcl_Interp *interp, Channel *chanPtr); static void CommonGetsCleanup(Channel *chanPtr); static int CopyData(CopyState *csPtr, int mask); +static void DeleteTimerHandler(ChannelState *statePtr); static int MoveBytes(CopyState *csPtr); static void MBCallback(CopyState *csPtr, Tcl_Obj *errObj); @@ -1730,6 +1732,7 @@ Tcl_CreateChannel( statePtr->scriptRecordPtr = NULL; statePtr->bufSize = CHANNELBUFFER_DEFAULT_SIZE; statePtr->timer = NULL; + statePtr->timerChanPtr = NULL; statePtr->csPtrR = NULL; statePtr->csPtrW = NULL; statePtr->outputStage = NULL; @@ -3187,8 +3190,8 @@ CloseChannel( /* * Cancel any outstanding timer. */ + DeleteTimerHandler(statePtr); - Tcl_DeleteTimerHandler(statePtr->timer); /* * Mark the channel as deleted by clearing the type structure. @@ -3540,7 +3543,7 @@ Tcl_Close( /* * Cancel any outstanding timer. */ - Tcl_DeleteTimerHandler(statePtr->timer); + DeleteTimerHandler(statePtr); /* * Invoke the registered close callbacks and delete their records. @@ -4015,8 +4018,7 @@ Tcl_ClearChannelHandlers( /* * Cancel any outstanding timer. */ - - Tcl_DeleteTimerHandler(statePtr->timer); + DeleteTimerHandler(statePtr); /* * Remove any references to channel handlers for this channel that may be @@ -8805,8 +8807,9 @@ UpdateInterest( if (!statePtr->timer) { TclChannelPreserve((Tcl_Channel)chanPtr); + statePtr->timerChanPtr = chanPtr; statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc, chanPtr); + ChannelTimerProc, chanPtr); } } } @@ -8816,6 +8819,7 @@ UpdateInterest( && GotFlag(statePtr, CHANNEL_NONBLOCKING)) { TclChannelPreserve((Tcl_Channel)chanPtr); + statePtr->timerChanPtr = chanPtr; statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, ChannelTimerProc,chanPtr); } @@ -8846,7 +8850,6 @@ ChannelTimerProc( void *clientData) { Channel *chanPtr = (Channel *)clientData; - /* State info for channel */ ChannelState *statePtr = chanPtr->state; @@ -8857,7 +8860,7 @@ ChannelTimerProc( */ if (chanPtr->typePtr == NULL) { - TclChannelRelease((Tcl_Channel)chanPtr); + CleanupTimerHandler(statePtr); } else { Tcl_Preserve(statePtr); statePtr->timer = NULL; @@ -8870,35 +8873,52 @@ ChannelTimerProc( * before UpdateInterest gets called by Tcl_NotifyChannel. */ statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); + ChannelTimerProc,chanPtr); Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_WRITABLE); } else { /* The channel may have just been closed from within Tcl_NotifyChannel */ if (!GotFlag(statePtr, CHANNEL_INCLOSE)) { if (!GotFlag(statePtr, CHANNEL_NEED_MORE_DATA) - && (statePtr->interestMask & TCL_READABLE) - && (statePtr->inQueueHead != NULL) - && IsBufferReady(statePtr->inQueueHead)) { + && (statePtr->interestMask & TCL_READABLE) + && (statePtr->inQueueHead != NULL) + && IsBufferReady(statePtr->inQueueHead)) { /* * Restart the timer in case a channel handler reenters the event loop * before UpdateInterest gets called by Tcl_NotifyChannel. */ statePtr->timer = Tcl_CreateTimerHandler(SYNTHETIC_EVENT_TIME, - ChannelTimerProc,chanPtr); + ChannelTimerProc,chanPtr); Tcl_NotifyChannel((Tcl_Channel) chanPtr, TCL_READABLE); } else { - TclChannelRelease((Tcl_Channel)chanPtr); + CleanupTimerHandler(statePtr); UpdateInterest(chanPtr); } } else { - TclChannelRelease((Tcl_Channel)chanPtr); + CleanupTimerHandler(statePtr); } } - Tcl_Release(statePtr); } - +} + +static void +DeleteTimerHandler( + ChannelState *statePtr +) +{ + if (statePtr->timer != NULL) { + Tcl_DeleteTimerHandler(statePtr->timer); + CleanupTimerHandler(statePtr); + } +} +static void +CleanupTimerHandler( + ChannelState *statePtr +){ + TclChannelRelease((Tcl_Channel)statePtr->timerChanPtr); + statePtr->timer = NULL; + statePtr->timerChanPtr = NULL; } /* diff --git a/generic/tclIO.h b/generic/tclIO.h index 689067f..bfaf416 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -188,6 +188,9 @@ typedef struct ChannelState { * handlers ("fileevent") on this channel. */ Tcl_Size bufSize; /* What size buffers to allocate? */ Tcl_TimerToken timer; /* Handle to wakeup timer for this channel. */ + Channel *timerChanPtr; /* Needed in order to decrement the refCount of + the right channel when the timer is + deleted. */ struct CopyState *csPtrR; /* State of background copy for which channel * is input, or NULL. */ struct CopyState *csPtrW; /* State of background copy for which channel -- cgit v0.12 From ea9a0277a2229f37c66c9a1e6ed1fb3e3e0a2f11 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Mar 2023 09:11:22 +0000 Subject: ckfree -> Tcl_Free --- generic/tclTest.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index fcb9ff4..f7854ac 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2205,7 +2205,7 @@ static int UtfExtWrapper( Tcl_SetObjResult(interp, Tcl_NewListObj(3, resultObjs)); } - ckfree(bufPtr); + Tcl_Free(bufPtr); Tcl_FreeEncoding(encoding); /* Free returned reference */ return result; } -- cgit v0.12 From 6bd8763c2db23d253082f9e4d79e53e60a77e856 Mon Sep 17 00:00:00 2001 From: dgp Date: Wed, 15 Mar 2023 12:53:03 +0000 Subject: Misspelled constraint created testing noise. --- tests/encoding.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index bac80c9..dc50f24 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -768,7 +768,7 @@ test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExtern } -result [list 0 [list nospace {} \x00\x00\xff]] test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding ucs-2 + testencoding ucs2 } -body { # Note - buffers are initialized to \xff list [catch {testencoding Tcl_UtfToExternal unicode A {start end} {} 4} result] $result -- cgit v0.12 From e0e09638fece9ca63daad3b3675dc7bfb1ede7d3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 15 Mar 2023 16:21:53 +0000 Subject: Remove _LARGEFILE_SOURCE64 usage. See [d690400d07] --- unix/configure | 105 ---------------------------------------------------- unix/tcl.m4 | 3 -- unix/tclConfig.h.in | 3 -- 3 files changed, 111 deletions(-) diff --git a/unix/configure b/unix/configure index 94ecfc6..2ebb2ea 100755 --- a/unix/configure +++ b/unix/configure @@ -9318,111 +9318,6 @@ _ACEOF tcl_flags="$tcl_flags _LARGEFILE64_SOURCE" fi - - if test "${tcl_cv_flag__largefile_source64+set}" = set; then - echo $ECHO_N "(cached) $ECHO_C" >&6 -else - cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#include -int -main () -{ -char *p = (char *)open64; - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 - (eval $ac_compile) 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" - || test ! -s conftest.err' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - tcl_cv_flag__largefile_source64=no -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -cat >conftest.$ac_ext <<_ACEOF -/* confdefs.h. */ -_ACEOF -cat confdefs.h >>conftest.$ac_ext -cat >>conftest.$ac_ext <<_ACEOF -/* end confdefs.h. */ -#define _LARGEFILE_SOURCE64 1 -#include -int -main () -{ -char *p = (char *)open64; - ; - return 0; -} -_ACEOF -rm -f conftest.$ac_objext -if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5 - (eval $ac_compile) 2>conftest.er1 - ac_status=$? - grep -v '^ *+' conftest.er1 >conftest.err - rm -f conftest.er1 - cat conftest.err >&5 - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); } && - { ac_try='test -z "$ac_c_werror_flag" - || test ! -s conftest.err' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; } && - { ac_try='test -s conftest.$ac_objext' - { (eval echo "$as_me:$LINENO: \"$ac_try\"") >&5 - (eval $ac_try) 2>&5 - ac_status=$? - echo "$as_me:$LINENO: \$? = $ac_status" >&5 - (exit $ac_status); }; }; then - tcl_cv_flag__largefile_source64=yes -else - echo "$as_me: failed program was:" >&5 -sed 's/^/| /' conftest.$ac_ext >&5 - -tcl_cv_flag__largefile_source64=no -fi -rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -fi -rm -f conftest.err conftest.$ac_objext conftest.$ac_ext -fi - - if test "x${tcl_cv_flag__largefile_source64}" = "xyes" ; then - -cat >>confdefs.h <<\_ACEOF -#define _LARGEFILE_SOURCE64 1 -_ACEOF - - tcl_flags="$tcl_flags _LARGEFILE_SOURCE64" - fi - if test "x${tcl_flags}" = "x" ; then echo "$as_me:$LINENO: result: none" >&5 echo "${ECHO_T}none" >&6 diff --git a/unix/tcl.m4 b/unix/tcl.m4 index 6cee92c..d9d0a71 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -2472,7 +2472,6 @@ AC_DEFUN([SC_TCL_LINK_LIBS], [ # Might define the following vars: # _ISOC99_SOURCE # _LARGEFILE64_SOURCE -# _LARGEFILE_SOURCE64 # #-------------------------------------------------------------------- @@ -2496,8 +2495,6 @@ AC_DEFUN([SC_TCL_EARLY_FLAGS],[ [char *p = (char *)strtoll; char *q = (char *)strtoull;]) SC_TCL_EARLY_FLAG(_LARGEFILE64_SOURCE,[#include ], [struct stat64 buf; int i = stat64("/", &buf);]) - SC_TCL_EARLY_FLAG(_LARGEFILE_SOURCE64,[#include ], - [char *p = (char *)open64;]) if test "x${tcl_flags}" = "x" ; then AC_MSG_RESULT([none]) else diff --git a/unix/tclConfig.h.in b/unix/tclConfig.h.in index 0b7ed35..6d559d1 100644 --- a/unix/tclConfig.h.in +++ b/unix/tclConfig.h.in @@ -463,9 +463,6 @@ /* Add the _LARGEFILE64_SOURCE flag when building */ #undef _LARGEFILE64_SOURCE -/* Add the _LARGEFILE_SOURCE64 flag when building */ -#undef _LARGEFILE_SOURCE64 - /* # needed in sys/socket.h Should OS/390 do the right thing with sockets? */ #undef _OE_SOCKETS -- cgit v0.12 From 70cf69246f83c91f78fd4de65ac48fa39aa634d4 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 15 Mar 2023 20:13:22 +0000 Subject: New script used in the "valgrind_each" target in Makefile.in --- tools/valgrind_check_success | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/valgrind_check_success diff --git a/tools/valgrind_check_success b/tools/valgrind_check_success new file mode 100644 index 0000000..24830d5 --- /dev/null +++ b/tools/valgrind_check_success @@ -0,0 +1,30 @@ +#! /usr/bin/env tclsh + + +proc main {sourcetype source} { + switch $sourcetype { + file { + set chan [open $source] + try { + set data [read $chan] + } finally { + close $chan + } + } + string { + set data $source + } + default { + error [list {wrong # args}] + } + } + set found [regexp -inline -all {blocks are\ + (?:(?:(?:definitely|indirectly|possibly) lost)|still reachable)} $data] + if {[llength $found]} { + puts 0 + } else { + puts 1 + } + flush stdout +} +main {*}$argv -- cgit v0.12 From a3c59e320df775f0d6849e5d3163292280b3b386 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Mar 2023 03:08:12 +0000 Subject: Change -encodingprofile to -profile --- generic/tclIO.c | 58 +++++++++++++++++++++++++-------------------------- tests/chanio.test | 6 +++--- tests/encoding.test | 10 ++++----- tests/io.test | 44 +++++++++++++++++++------------------- tests/ioCmd.test | 26 +++++++++++------------ tests/winConsole.test | 14 ++++++------- tests/zlib.test | 4 ++-- 7 files changed, 81 insertions(+), 81 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index f24eaa0..dbdbda5 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7810,7 +7810,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding encodingprofile eofchar translation"; + "blocking buffering buffersize encoding eofchar profile translation"; const char **argv; int argc, i; Tcl_DString ds; @@ -7951,7 +7951,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(8, "-encoding")) { + if (len == 0 || HaveOpt(2, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } @@ -7965,23 +7965,6 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(9, "-encodingprofile")) { - int profile; - const char *profileName; - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); - } - /* Note currently input and output profiles are same */ - profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); - profileName = TclEncodingProfileIdToName(interp, profile); - if (profileName == NULL) { - return TCL_ERROR; - } - Tcl_DStringAppendElement(dsPtr, profileName); - if (len > 0) { - return TCL_OK; - } - } if (len == 0 || HaveOpt(2, "-eofchar")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-eofchar"); @@ -8025,6 +8008,23 @@ Tcl_GetChannelOption( return TCL_OK; } } + if (len == 0 || HaveOpt(1, "-profile")) { + int profile; + const char *profileName; + if (len == 0) { + Tcl_DStringAppendElement(dsPtr, "-profile"); + } + /* Note currently input and output profiles are same */ + profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); + profileName = TclEncodingProfileIdToName(interp, profile); + if (profileName == NULL) { + return TCL_ERROR; + } + Tcl_DStringAppendElement(dsPtr, profileName); + if (len > 0) { + return TCL_OK; + } + } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-translation"); @@ -8194,7 +8194,7 @@ Tcl_SetChannelOption( } Tcl_SetChannelBufferSize(chan, newBufferSize); return TCL_OK; - } else if (HaveOpt(8, "-encoding")) { + } else if (HaveOpt(2, "-encoding")) { Tcl_Encoding encoding; int profile; @@ -8230,15 +8230,6 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; - } else if (HaveOpt(9, "-encodingprofile")) { - int profile; - if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { - return TCL_ERROR; - } - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { if (!newValue[0] || (!(newValue[0] & 0x80) && !newValue[1])) { if (GotFlag(statePtr, TCL_READABLE)) { @@ -8294,6 +8285,15 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; + } else if (HaveOpt(1, "-profile")) { + int profile; + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { + return TCL_ERROR; + } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + return TCL_OK; } else if (HaveOpt(1, "-translation")) { const char *readMode, *writeMode; diff --git a/tests/chanio.test b/tests/chanio.test index 6da6305..d2008e6 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -254,7 +254,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 16 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -267,7 +267,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -300,7 +300,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/encoding.test b/tests/encoding.test index 1af5a26..31f966c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -105,13 +105,13 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} -test encoding-3.3 {fconfigure -encodingprofile} -setup { - set old [fconfigure stdout -encodingprofile] +test encoding-3.3 {fconfigure -profile} -setup { + set old [fconfigure stdout -profile] } -body { - fconfigure stdout -encodingprofile replace - fconfigure stdout -encodingprofile + fconfigure stdout -profile replace + fconfigure stdout -profile } -cleanup { - fconfigure stdout -encodingprofile $old + fconfigure stdout -profile $old } -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { diff --git a/tests/io.test b/tests/io.test index fc126de..c3c0cdd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -274,7 +274,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 16 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -288,7 +288,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -321,7 +321,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -7634,7 +7634,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out @@ -7656,7 +7656,7 @@ test io-52.21 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict fcopy $in $out } -cleanup { @@ -7676,7 +7676,7 @@ test io-52.22 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf proc ::xxx args { set ::s0 $args @@ -7704,7 +7704,7 @@ test io-52.23 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict proc ::xxx args { set ::s0 $args } @@ -9073,7 +9073,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes} -setup { puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9083,10 +9083,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes} -setup { removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-profile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 + fconfigure $f -encoding iso8859-1 -profile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -9100,14 +9100,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-enco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] close $f @@ -9119,7 +9119,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tc # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9128,7 +9128,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9138,14 +9138,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] close $f @@ -9155,7 +9155,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9163,7 +9163,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9178,7 +9178,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile strict + fconfigure $f -encoding iso8859-1 -profile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9222,7 +9222,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9249,7 +9249,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9257,7 +9257,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile stri puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 23cd67e..aeb9f87 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation + -blocking -buffering -buffersize -encoding -eofchar -profile -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -240,33 +240,33 @@ test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] - fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -encodingprofile tcl8 + fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -profile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 -encodingprofile tcl8 + -eofchar {} -encoding utf-16 -profile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ - -eofchar {} -encoding binary -encodingprofile tcl8 + -eofchar {} -encoding binary -profile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile tcl8 -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -378,7 +378,7 @@ test iocmd-8.21 {fconfigure command / -nocomplainencoding 0 error} -constraints } -returnCodes error -result "bad value for -nocomplainencoding: only true allowed" test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strictencoding already defined} -setup { set console stdin - set oldprofile [fconfigure $console -encodingprofile] + set oldprofile [fconfigure $console -profile] } -constraints { obsolete } -body { @@ -390,8 +390,8 @@ test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strict } -result 0 -test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { - fconfigure stdin -encodingprofile froboz +test iocmd-8.21 {fconfigure -profile badprofile} -body { + fconfigure stdin -profile froboz } -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} test iocmd-9.1 {eof command} { @@ -1387,7 +1387,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -profile * -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1396,7 +1396,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -profile * -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1408,7 +1408,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {{} {}} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {{} {}} -profile * -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/winConsole.test b/tests/winConsole.test index 62dfbf3..f030444 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -profile -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -eofchar -profile -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, or -profile, -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index 0566b8b..42d9e9c 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile {} -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 4d978a00ede12fc08aeddf81f4c936d533f7c9f3 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Mar 2023 03:48:45 +0000 Subject: Change -encodingprofile to -profile --- generic/tclIO.c | 56 +++++++++++++++++++++++++-------------------------- tests/chanio.test | 6 +++--- tests/encoding.test | 10 ++++----- tests/io.test | 44 ++++++++++++++++++++-------------------- tests/ioCmd.test | 24 +++++++++++----------- tests/winConsole.test | 14 ++++++------- tests/zlib.test | 4 ++-- 7 files changed, 79 insertions(+), 79 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index cc0af2e..63579ee 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7777,7 +7777,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding encodingprofile eofchar translation"; + "blocking buffering buffersize encoding eofchar profile translation"; const char **argv; Tcl_Size argc, i; Tcl_DString ds; @@ -7918,7 +7918,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(8, "-encoding")) { + if (len == 0 || HaveOpt(2, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } @@ -7932,11 +7932,25 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(9, "-encodingprofile")) { + if (len == 0 || HaveOpt(2, "-eofchar")) { + char buf[4] = ""; + if (len == 0) { + Tcl_DStringAppendElement(dsPtr, "-eofchar"); + } + if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { + sprintf(buf, "%c", statePtr->inEofChar); + } + if (len > 0) { + Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); + return TCL_OK; + } + Tcl_DStringAppendElement(dsPtr, buf); + } + if (len == 0 || HaveOpt(1, "-profile")) { int profile; const char *profileName; if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); + Tcl_DStringAppendElement(dsPtr, "-profile"); } /* Note currently input and output profiles are same */ profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); @@ -7949,20 +7963,6 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-eofchar")) { - char buf[4] = ""; - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-eofchar"); - } - if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { - sprintf(buf, "%c", statePtr->inEofChar); - } - if (len > 0) { - Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); - return TCL_OK; - } - Tcl_DStringAppendElement(dsPtr, buf); - } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-translation"); @@ -8142,7 +8142,7 @@ Tcl_SetChannelOption( } Tcl_SetChannelBufferSize(chan, newBufferSize); return TCL_OK; - } else if (HaveOpt(8, "-encoding")) { + } else if (HaveOpt(2, "-encoding")) { Tcl_Encoding encoding; int profile; @@ -8178,15 +8178,6 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; - } else if (HaveOpt(9, "-encodingprofile")) { - int profile; - if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { - return TCL_ERROR; - } - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); - return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { if (!newValue[0] || (!(newValue[0] & 0x80) && (!newValue[1] #ifndef TCL_NO_DEPRECATED @@ -8221,6 +8212,15 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; + } else if (HaveOpt(1, "-profile")) { + int profile; + if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { + return TCL_ERROR; + } + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + return TCL_OK; } else if (HaveOpt(1, "-translation")) { const char *readMode, *writeMode; diff --git a/tests/chanio.test b/tests/chanio.test index 2caae44..ee6133e 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -254,7 +254,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 16 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -267,7 +267,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -300,7 +300,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/encoding.test b/tests/encoding.test index db5680d..5224225 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -106,13 +106,13 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} -test encoding-3.3 {fconfigure -encodingprofile} -setup { - set old [fconfigure stdout -encodingprofile] +test encoding-3.3 {fconfigure -profile} -setup { + set old [fconfigure stdout -profile] } -body { - fconfigure stdout -encodingprofile replace - fconfigure stdout -encodingprofile + fconfigure stdout -profile replace + fconfigure stdout -profile } -cleanup { - fconfigure stdout -encodingprofile $old + fconfigure stdout -profile $old } -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { diff --git a/tests/io.test b/tests/io.test index e9f4bae..538f554 100644 --- a/tests/io.test +++ b/tests/io.test @@ -339,7 +339,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 16 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -353,7 +353,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -386,7 +386,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -7700,7 +7700,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out @@ -7722,7 +7722,7 @@ test io-52.21 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict fcopy $in $out } -cleanup { @@ -7742,7 +7742,7 @@ test io-52.22 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf proc ::xxx args { set ::s0 $args @@ -7770,7 +7770,7 @@ test io-52.23 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict proc ::xxx args { set ::s0 $args } @@ -9136,7 +9136,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9146,10 +9146,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-profile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 + fconfigure $f -encoding iso8859-1 -profile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -9163,14 +9163,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-enco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] close $f @@ -9182,7 +9182,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tc # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9191,7 +9191,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9201,14 +9201,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] close $f @@ -9218,7 +9218,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9226,7 +9226,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9241,7 +9241,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile strict + fconfigure $f -encoding iso8859-1 -profile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9285,7 +9285,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9312,7 +9312,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9320,7 +9320,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile stri puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index a1ec571..1ac5ca7 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation + -blocking -buffering -buffersize -encoding -eofchar -profile -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -240,33 +240,33 @@ test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] - fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 + fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -profile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 -encodingprofile tcl8 + -eofchar {} -encoding utf-16 -profile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ - -eofchar {} -encoding binary + -eofchar {} -encoding binary -profile tcl8 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile tcl8 -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -369,8 +369,8 @@ test iocmd-8.20 {fconfigure command / win console channel} -constraints {nonPort # TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). -test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { - fconfigure stdin -encodingprofile froboz +test iocmd-8.21 {fconfigure -profile badprofile} -body { + fconfigure stdin -profile froboz } -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} test iocmd-9.1 {eof command} { @@ -1372,7 +1372,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1381,7 +1381,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1393,7 +1393,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/winConsole.test b/tests/winConsole.test index 62dfbf3..f030444 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -profile -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -eofchar -profile -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, or -profile, -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index ae7dd6d..720fdd6 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 5846b1666f9fda4d12d9cc46f8bd2050b1ed4ef4 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Thu, 16 Mar 2023 08:15:03 +0000 Subject: Make valgrind_foreach target in Makefile.in properly handle interrupted tests. --- unix/Makefile.in | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/unix/Makefile.in b/unix/Makefile.in index da057d8..e092a2d 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -956,7 +956,8 @@ testresults/valgrind/%.result: ${TCL_EXE} ${TCLTEST_EXE} @mkdir -p testresults/valgrind $(SHELL_ENV) $(VALGRIND) $(VALGRINDARGS) ./${TCLTEST_EXE} \ $(TOP_DIR)/tests/all.tcl -singleproc 1 -constraints valgrind \ - -file $(basename $(notdir $@)) > $@ 2>&1 + -file $(basename $(notdir $@)) > $@.tmp 2>&1 + @mv $@.tmp $@ .PRECIOUS: testresults/valgrind/%.result @@ -966,7 +967,7 @@ testresults/valgrind/%.success: testresults/valgrind/%.result @printf '\n >&2' @status=$$(./${TCLTEST_EXE} $(TOP_DIR)/tools/valgrind_check_success \ file $(basename $@).result); \ - if [ "$$status" -eq 1 ]; then exit 0; else exit 1; fi + if [ "$$status" -eq 1 ]; then touch $@; exit 0; else exit 1; fi valgrind_each: $(addprefix testresults/valgrind/,$(addsuffix .success,$(notdir\ $(wildcard $(TOP_DIR)/tests/*.test)))) -- cgit v0.12 From 93ce80a91680899c039e65bf1c00d24045619bd9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 10:00:26 +0000 Subject: unicode -> utf-16. Remove some unneeded encodingProfileTodo constraints --- tests/encoding.test | 12 +----------- tests/utfext.test | 2 +- 2 files changed, 2 insertions(+), 12 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 5224225..407bd28 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -514,8 +514,6 @@ test encoding-16.7 {Utf32ToUtfProc} -body { test encoding-16.8 {Utf32ToUtfProc} -body { set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] -} -constraints { - encodingProfileTodo } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom utf-32le \x00\xD8\x00\x00 @@ -625,8 +623,6 @@ test encoding-17.12 {Utf32ToUtfProc} -body { test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -} -constraints { - encodingProfileTodo } -result {1 {unexpected character at index 0: 'U+00005C'}} test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { list [catch {encoding convertto -profile strict jis0208 \\} res] $res @@ -816,8 +812,6 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.19 {Parse valid or invalid utf-8} -body { encoding convertto utf-8 "ZX\uD800" -} -constraints { - encodingProfileTodo } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { encoding convertfrom -profile tcl8 "\x20" @@ -875,8 +869,6 @@ test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uD800 -} -constraints { - encodingProfileTodo } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { encoding convertto -profile strict utf-8 \uD800 @@ -1096,9 +1088,7 @@ test encoding-bug-183a1adcc0-4 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExtern } -result [list 0 [list nospace {} \x00\x00\xff]] test encoding-bug-183a1adcc0-5 {Bug [183a1adcc0] Buffer overflow Tcl_UtfToExternal} -constraints { - testencoding -} -constraints { - knownBug + testencoding ucs2 knownBug } -body { # The knownBug constraint is because test depends on TCL_UTF_MAX and # also UtfToUtf16 assumes space required in destination buffer is diff --git a/tests/utfext.test b/tests/utfext.test index 4ceb72f..175e3fa 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -85,7 +85,7 @@ foreach {enc utfhex hex} $utfExtMap { # Test for insufficient space test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { - testencoding Tcl_UtfToExternal unicode A {start end} {} 1 + testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1 } -result [list nospace {} \xFF] ::tcltest::cleanupTests -- cgit v0.12 From e019e5bd1d3ddb51539ca9e4872e6c9d310dd390 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 11:29:14 +0000 Subject: Fix (minor) warning on 32-bit platforms --- generic/tclTest.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index 668a05a..15eaa56 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2119,7 +2119,7 @@ static int UtfExtWrapper( /* Assumes state is integer if not "" */ Tcl_WideInt wide; if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { - encState = (Tcl_EncodingState) wide; + encState = (Tcl_EncodingState)(size_t)wide; encStatePtr = &encState; } else if (Tcl_GetCharLength(objv[5]) == 0) { encStatePtr = NULL; @@ -2209,7 +2209,7 @@ static int UtfExtWrapper( } result = TCL_OK; resultObjs[1] = - encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)(size_t)encState) : Tcl_NewObj(); resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { if (Tcl_ObjSetVar2(interp, -- cgit v0.12 From 1eefaec4712a22b1cea12961deb534381158010e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 11:38:20 +0000 Subject: Minor cleanup (Thanks, Gustaf!) --- generic/tclLiteral.c | 4 ++-- generic/tclProc.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tclLiteral.c b/generic/tclLiteral.c index 24e99fc..c3f0f7d 100644 --- a/generic/tclLiteral.c +++ b/generic/tclLiteral.c @@ -179,8 +179,8 @@ TclCreateLiteral( const char *bytes, /* The start of the string. Note that this is * not a NUL-terminated string. */ size_t length, /* Number of bytes in the string. */ - size_t hash, /* The string's hash. If -1, it will be - * computed here. */ + size_t hash, /* The string's hash. If the value is + * TCL_INDEX_NONE, it will be computed here. */ int *newPtr, Namespace *nsPtr, int flags, diff --git a/generic/tclProc.c b/generic/tclProc.c index c8a304a..a472a2d 100644 --- a/generic/tclProc.c +++ b/generic/tclProc.c @@ -1298,7 +1298,7 @@ InitLocalCache( *namePtr = NULL; } else { *namePtr = TclCreateLiteral(iPtr, localPtr->name, - localPtr->nameLength, /* hash */ (size_t) -1, + localPtr->nameLength, /* hash */ TCL_INDEX_NONE, &isNew, /* nsPtr */ NULL, 0, NULL); Tcl_IncrRefCount(*namePtr); } -- cgit v0.12 From 1a6f1d5c40570e83189a91e4301d9e89369ce00e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 15:12:17 +0000 Subject: Add some undocumented stub functions. Those can prevent a crash like [http://paste.tclers.tk/5763|this] example, when compiled with 8.7 headers but running it in Tcl 8.6. --- generic/tcl.decls | 16 +++++++++++++-- generic/tclDecls.h | 30 +++++++++++++++++++--------- generic/tclPlatDecls.h | 19 ++++++++++-------- generic/tclStubInit.c | 54 +++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 95 insertions(+), 24 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index d20a945..7f734c6 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2326,6 +2326,18 @@ declare 630 { # ----- BASELINE -- FOR -- 8.6.0 ----- # +# TIP #481 (undocumented stub entries) +declare 651 { + char *TclGetStringFromObj_(Tcl_Obj *objPtr, size_t *lengthPtr) +} +declare 652 { + unsigned short *TclGetUnicodeFromObj_(Tcl_Obj *objPtr, size_t *lengthPtr) +} +# Only available in Tcl 8.x, NULL in Tcl 9.0 +declare 653 { + unsigned char *TclGetByteArrayFromObj_(Tcl_Obj *objPtr, size_t *numBytesPtr) +} + declare 687 { void TclUnusedStubEntry(void) } @@ -2355,7 +2367,7 @@ declare 1 win { char *Tcl_WinTCharToUtf(const TCHAR *str, int len, Tcl_DString *dsPtr) } declare 3 win { - void TclUnusedStubEntry(void) + void TclWinConvertError_(unsigned errCode) } ################################ @@ -2372,7 +2384,7 @@ declare 1 macosx { int hasResourceFile, int maxPathLen, char *libraryPath) } declare 2 macosx { - void TclUnusedStubEntry(void) + void TclMacOSXNotifierAddRunLoopMode_(const void *runLoopMode) } ############################################################################## diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 6c109de..551a5b6 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1835,9 +1835,15 @@ EXTERN void Tcl_ZlibStreamSetCompressionDictionary( /* Slot 648 is reserved */ /* Slot 649 is reserved */ /* Slot 650 is reserved */ -/* Slot 651 is reserved */ -/* Slot 652 is reserved */ -/* Slot 653 is reserved */ +/* 651 */ +EXTERN char * TclGetStringFromObj_(Tcl_Obj *objPtr, + size_t *lengthPtr); +/* 652 */ +EXTERN unsigned short * TclGetUnicodeFromObj_(Tcl_Obj *objPtr, + size_t *lengthPtr); +/* 653 */ +EXTERN unsigned char * TclGetByteArrayFromObj_(Tcl_Obj *objPtr, + size_t *numBytesPtr); /* Slot 654 is reserved */ /* Slot 655 is reserved */ /* Slot 656 is reserved */ @@ -2559,9 +2565,9 @@ typedef struct TclStubs { void (*reserved648)(void); void (*reserved649)(void); void (*reserved650)(void); - void (*reserved651)(void); - void (*reserved652)(void); - void (*reserved653)(void); + char * (*tclGetStringFromObj_) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */ + unsigned short * (*tclGetUnicodeFromObj_) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ + unsigned char * (*tclGetByteArrayFromObj_) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */ void (*reserved654)(void); void (*reserved655)(void); void (*reserved656)(void); @@ -3908,9 +3914,12 @@ extern const TclStubs *tclStubsPtr; /* Slot 648 is reserved */ /* Slot 649 is reserved */ /* Slot 650 is reserved */ -/* Slot 651 is reserved */ -/* Slot 652 is reserved */ -/* Slot 653 is reserved */ +#define TclGetStringFromObj_ \ + (tclStubsPtr->tclGetStringFromObj_) /* 651 */ +#define TclGetUnicodeFromObj_ \ + (tclStubsPtr->tclGetUnicodeFromObj_) /* 652 */ +#define TclGetByteArrayFromObj_ \ + (tclStubsPtr->tclGetByteArrayFromObj_) /* 653 */ /* Slot 654 is reserved */ /* Slot 655 is reserved */ /* Slot 656 is reserved */ @@ -3984,6 +3993,9 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_SeekOld #undef Tcl_TellOld +#undef TclGetStringFromObj_ +#undef TclGetUnicodeFromObj_ +#undef TclGetByteArrayFromObj_ #undef Tcl_PkgPresent #define Tcl_PkgPresent(interp, name, version, exact) \ diff --git a/generic/tclPlatDecls.h b/generic/tclPlatDecls.h index cb420fd..46181a1 100644 --- a/generic/tclPlatDecls.h +++ b/generic/tclPlatDecls.h @@ -59,7 +59,7 @@ EXTERN char * Tcl_WinTCharToUtf(const TCHAR *str, int len, Tcl_DString *dsPtr); /* Slot 2 is reserved */ /* 3 */ -EXTERN void TclUnusedStubEntry(void); +EXTERN void TclWinConvertError_(unsigned errCode); #endif /* WIN */ #ifdef MAC_OSX_TCL /* MACOSX */ /* 0 */ @@ -73,7 +73,8 @@ EXTERN int Tcl_MacOSXOpenVersionedBundleResources( int hasResourceFile, int maxPathLen, char *libraryPath); /* 2 */ -EXTERN void TclUnusedStubEntry(void); +EXTERN void TclMacOSXNotifierAddRunLoopMode_( + const void *runLoopMode); #endif /* MACOSX */ typedef struct TclPlatStubs { @@ -84,12 +85,12 @@ typedef struct TclPlatStubs { TCHAR * (*tcl_WinUtfToTChar) (const char *str, int len, Tcl_DString *dsPtr); /* 0 */ char * (*tcl_WinTCharToUtf) (const TCHAR *str, int len, Tcl_DString *dsPtr); /* 1 */ void (*reserved2)(void); - void (*tclUnusedStubEntry) (void); /* 3 */ + void (*tclWinConvertError_) (unsigned errCode); /* 3 */ #endif /* WIN */ #ifdef MAC_OSX_TCL /* MACOSX */ int (*tcl_MacOSXOpenBundleResources) (Tcl_Interp *interp, const char *bundleName, int hasResourceFile, int maxPathLen, char *libraryPath); /* 0 */ int (*tcl_MacOSXOpenVersionedBundleResources) (Tcl_Interp *interp, const char *bundleName, const char *bundleVersion, int hasResourceFile, int maxPathLen, char *libraryPath); /* 1 */ - void (*tclUnusedStubEntry) (void); /* 2 */ + void (*tclMacOSXNotifierAddRunLoopMode_) (const void *runLoopMode); /* 2 */ #endif /* MACOSX */ } TclPlatStubs; @@ -111,16 +112,16 @@ extern const TclPlatStubs *tclPlatStubsPtr; #define Tcl_WinTCharToUtf \ (tclPlatStubsPtr->tcl_WinTCharToUtf) /* 1 */ /* Slot 2 is reserved */ -#define TclUnusedStubEntry \ - (tclPlatStubsPtr->tclUnusedStubEntry) /* 3 */ +#define TclWinConvertError_ \ + (tclPlatStubsPtr->tclWinConvertError_) /* 3 */ #endif /* WIN */ #ifdef MAC_OSX_TCL /* MACOSX */ #define Tcl_MacOSXOpenBundleResources \ (tclPlatStubsPtr->tcl_MacOSXOpenBundleResources) /* 0 */ #define Tcl_MacOSXOpenVersionedBundleResources \ (tclPlatStubsPtr->tcl_MacOSXOpenVersionedBundleResources) /* 1 */ -#define TclUnusedStubEntry \ - (tclPlatStubsPtr->tclUnusedStubEntry) /* 2 */ +#define TclMacOSXNotifierAddRunLoopMode_ \ + (tclPlatStubsPtr->tclMacOSXNotifierAddRunLoopMode_) /* 2 */ #endif /* MACOSX */ #endif /* defined(USE_TCL_STUBS) */ @@ -128,6 +129,8 @@ extern const TclPlatStubs *tclPlatStubsPtr; /* !END!: Do not edit above this line. */ #undef TclUnusedStubEntry +#undef TclMacOSXNotifierAddRunLoopMode_ +#undef TclWinConvertError_ #ifdef MAC_OSX_TCL /* MACOSX */ #undef Tcl_MacOSXOpenBundleResources #define Tcl_MacOSXOpenBundleResources(a,b,c,d,e) Tcl_MacOSXOpenVersionedBundleResources(a,b,NULL,c,d,e) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index ee0412a..565dd8c 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -59,6 +59,7 @@ #define TclBN_mp_tc_or TclBN_mp_or #define TclBN_mp_tc_xor TclBN_mp_xor #define TclStaticPackage Tcl_StaticPackage +#define TclMacOSXNotifierAddRunLoopMode_ TclMacOSXNotifierAddRunLoopMode #define TclUnusedStubEntry 0 /* See bug 510001: TclSockMinimumBuffers needs plat imp */ @@ -138,12 +139,55 @@ static const char *TclGetStartupScriptFileName(void) return Tcl_GetString(path); } +#define TclGetStringFromObj_ getStringFromObj +static char * +TclGetStringFromObj_( + Tcl_Obj *objPtr, + size_t *lengthPtr) +{ + int length; + char *result = Tcl_GetStringFromObj(objPtr, &length); + *lengthPtr = (size_t)length; + return result; +} + +#define TclGetUnicodeFromObj_ getUnicodeFromObj +static unsigned short * +TclGetUnicodeFromObj_( + Tcl_Obj *objPtr, + size_t *lengthPtr) +{ + int length; + Tcl_UniChar *result = Tcl_GetUnicodeFromObj(objPtr, &length); + *lengthPtr = (size_t)length; + return result; +} + +#define TclGetByteArrayFromObj_ getByteArrayFromObj +static unsigned char * +TclGetByteArrayFromObj_( + Tcl_Obj *objPtr, + size_t *numBytesPtr) +{ + int numBytes; + unsigned char *result = Tcl_GetByteArrayFromObj(objPtr, &numBytes); + *numBytesPtr = (size_t)numBytes; + return result; +} + + #if defined(_WIN32) || defined(__CYGWIN__) #undef TclWinNToHS #define TclWinNToHS winNToHS static unsigned short TclWinNToHS(unsigned short ns) { return ntohs(ns); } +#define TclWinConvertError_ winConvertError +static void +TclWinConvertError_(unsigned errCode) { + return TclWinConvertError(errCode); +} + #endif #define TclpCreateTempFile_ TclpCreateTempFile @@ -865,12 +909,12 @@ static const TclPlatStubs tclPlatStubs = { Tcl_WinUtfToTChar, /* 0 */ Tcl_WinTCharToUtf, /* 1 */ 0, /* 2 */ - TclUnusedStubEntry, /* 3 */ + TclWinConvertError_, /* 3 */ #endif /* WIN */ #ifdef MAC_OSX_TCL /* MACOSX */ Tcl_MacOSXOpenBundleResources, /* 0 */ Tcl_MacOSXOpenVersionedBundleResources, /* 1 */ - TclUnusedStubEntry, /* 2 */ + TclMacOSXNotifierAddRunLoopMode_, /* 2 */ #endif /* MACOSX */ }; @@ -1644,9 +1688,9 @@ const TclStubs tclStubs = { 0, /* 648 */ 0, /* 649 */ 0, /* 650 */ - 0, /* 651 */ - 0, /* 652 */ - 0, /* 653 */ + TclGetStringFromObj_, /* 651 */ + TclGetUnicodeFromObj_, /* 652 */ + TclGetByteArrayFromObj_, /* 653 */ 0, /* 654 */ 0, /* 655 */ 0, /* 656 */ -- cgit v0.12 From 05262be3319aa7027310e8a53e32d0cf63f501d0 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Mar 2023 16:24:04 +0000 Subject: Update manpages in anticipation of TIP 656 --- doc/Encoding.3 | 87 +++++++++++++++------ doc/chan.n | 10 +++ doc/encoding.n | 231 ++++++++++++++++++++++++++++++++++--------------------- doc/fconfigure.n | 37 +++------ 4 files changed, 225 insertions(+), 140 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 9b88c11..76ea193 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings +Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_UtfToExternalDStringEx, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternalDStringEx, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings .SH SYNOPSIS .nf \fB#include \fR @@ -26,13 +26,13 @@ char * \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp int -\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +\fBTcl_ExternalToUtfDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR) .sp char * \fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp int -\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +\fBTcl_UtfToExternalDStringEx\fR(\fIinterp, encoding, src, srcLen, flags, dstPtr, errorIdxPtr\fR) .sp int \fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, @@ -105,7 +105,7 @@ encoding-specific length of the string is used. Pointer to an uninitialized or free \fBTcl_DString\fR in which the converted result will be stored. .AP int flags in -Various flag bits OR-ed together. +This is a bit mask passed in to control the operation of the encoding functions. \fBTCL_ENCODING_START\fR signifies that the source buffer is the first block in a (potentially multi-block) input stream, telling the conversion routine to reset to an initial state and @@ -113,16 +113,15 @@ perform any initialization that needs to occur before the first byte is converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last -byte is converted and then to reset to an initial state. -\fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should -return immediately upon reading a source character that does not exist in -the target encoding; otherwise a default fallback character will -automatically be substituted. The flag \fBTCL_ENCODING_STRICT\fR makes the -encoder/decoder more strict in what it considers to be an invalid byte -sequence. The flag \fBTCL_ENCODING_NOCOMPLAIN\fR has -no effect, it is reserved for Tcl 9.0. The flag \fBTCL_ENCODING_MODIFIED\fR makes -\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the -byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. +byte is converted and then to reset to an initial state. The +\fBTCL_PROFILE_*\fR bits defined in the \fBPROFILES\fR section below +control the encoding profile to be used for dealing with invalid data or +other errors in the encoding transform. +\fBTCL_ENCODING_STOPONERROR\fR is present for backward compatibility with +Tcl 8.6 and forces the encoding profile to \fBstrict\fR. + +Some flags bits may not be usable with some functions as noted in the +function descriptions below. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -148,6 +147,9 @@ buffer as a result of the conversion. May be NULL. .AP int *dstCharsPtr out Filled with the number of characters that correspond to the number of bytes stored in the output buffer. May be NULL. +.AP Tcl_Size *errorIdxPtr out +Filled with the index of the byte or character that caused the encoding transform +to fail. May be NULL. .AP Tcl_DString *bufPtr out Storage for the prescribed system encoding name. .AP "const Tcl_EncodingType" *typePtr in @@ -221,11 +223,30 @@ call \fBTcl_DStringFree\fR to free any information stored in \fIdstPtr\fR. When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. -.PP -\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR, -but it has an additional flags parameter. The return value is the index of -the first byte in the input string causing a conversion error. -Or TCL_INDEX_NONE if all is OK. + +.PP +\fBTcl_ExternalToUtfDStringEx\fR is a more flexible version of older +\fBTcl_ExternalToUtfDString\fR function. It takes three additional parameters, +\fBinterp\fR, \fBflags\fR and \fBerrorIdxPtr\fR. The \fBflags\fR parameter may +be used to specify the profile to be used for the transform. The +\fBTCL_ENCODING_START\fR and \fBTCL_ENCODING_END\fR bits in \fBflags\fR are +ignored as the function assumes the entire source string to be decoded is passed +into the function. On success, the function returns \fBTCL_ERROR\fR with the +converted string stored in \fB*dstPtr\fR. For errors other than conversion +errors, such as invalid flags, the function returns \fBTCL_OK\fR with an error +message in \fBinterp\fR if it is not NULL. + +For conversion errors, \fBTcl_ExternalToUtfDStringEx\fR returns one +of the \fBTCL_CONVERT_*\fR errors listed below for \fBTcl_ExternalToUtf\fR. +When one of these conversion errors is returned, an error message is +stored in \fBinterp\fR only if \fBerrorIdxPtr\fR is NULL. Otherwise, no error message +is stored as the function expects the caller is interested whatever is +decoded to that point and not treating this as an immediate error condition. +The index of the error location is stored in \fB*errorIdxPtr\fR. + +The caller must call \fBTcl_DStringFree\fR to free up the \fB*dstPtr\fR resources +irrespective of the return value from the function. + .PP \fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified \fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the @@ -248,12 +269,12 @@ the unconverted bytes that remained in \fIsrc\fR plus some further bytes from the source stream to properly convert the formerly split-up multibyte sequence. .IP \fBTCL_CONVERT_SYNTAX\fR 29 -The source buffer contained an invalid character sequence. This may occur +The source buffer contained an invalid byte or character sequence. This may occur if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 The source buffer contained a character that could not be represented in -the target encoding and \fBTCL_ENCODING_STOPONERROR\fR was specified. +the target encoding. .RE .LP \fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 @@ -265,10 +286,14 @@ characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP -\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR, -but it has an additional flags parameter. The return value is the index of -the first byte of an utf-8 byte-sequence in the input string causing a -conversion error. Or TCL_INDEX_NONE if all is OK. +\fBTcl_UtfToExternalDStringEx\fR is an enhanced version of +\fBTcl_UtfToExternalDString\fR that transforms UTF-8 encoded source data to a specified +\fIencoding\fR. Except for the direction of the transform, the parameters and +return values are identical to those of \fBTcl_ExternalToUtfDStringEx\fR. See +that function above for details about the same. + +Irrespective of the return code from the function, the caller must free +resources associated with \fB*dstPtr\fR when the function returns. .PP \fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from @@ -592,6 +617,18 @@ to the object, it will be deleted. .PP \fBTcl_GetEncodingSearchPath\fR returns an object with a reference count of at least 1. +.SH "PROFILES" +Encoding profiles define the manner in which errors in the encoding transforms +are handled by the encoding functions. An application can specify the profile +to be used by OR-ing the \fBflags\fR parameter passed to the function +with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, +\fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR. +These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles +respectively. If none are specified, a version-dependent default profile is used. +For Tcl 8.7, the default profile is \fBtcl8\fR. + +For details about profiles, see the \fBPROFILES\fR section in +the documentation of the \fBencoding\fR command. .SH "SEE ALSO" encoding(n) .SH KEYWORDS diff --git a/doc/chan.n b/doc/chan.n index bf6c85c..1ecef4c 100644 --- a/doc/chan.n +++ b/doc/chan.n @@ -156,6 +156,16 @@ applied to input only. The default value is the empty string, except that under Windows the default value for reading is Control-z (\ex1A). The acceptable range is \ex01 - \ex7f. A value outside this range results in an error. +.VS "TCL8.7 TIP656" +.TP +\fB\-profile\fR \fIprofile\fR +. +Specifies the encoding profile to be used on the channel. The encoding +transforms in use for the channel's input and output will then be subject to the +rules of that profile. Any failures will result in a channel error. See +\fBPROFILES\fR in the \fBencoding(n)\fR documentation for details about encoding +profiles. +.VE "TCL8.7 TIP656" .TP \fB\-translation\fR \fItranslation\fR .TP diff --git a/doc/encoding.n b/doc/encoding.n index 4ad2824..9bb6e93 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -28,71 +28,41 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR -\fBencoding convertfrom\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fIencoding\fR? \fIdata\fR +.TP +\fBencoding convertfrom\fR ?\fB-profile \fIprofile\fR? ?\fB-failindex var\fR? \fIencoding\fR \fIdata\fR . -Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The -characters in \fIdata\fR are 8 bit binary data. The resulting -sequence of bytes is a string created by applying the given \fIencoding\fR -to the data. If \fIencoding\fR is not specified, the current +Converts \fIdata\fR, which should be in binary string encoded as per +\fIencoding\fR, to a Tcl string. If \fIencoding\fR is not specified, the current system encoding is used. -.VS "TCL8.7 TIP346, TIP607, TIP601" -.PP -.RS -The command does not fail on encoding errors (unless \fB-strict\fR is specified). -Instead, any not convertable bytes (like incomplete UTF-8 sequences, see example -below) are put as byte values into the output stream. -.PP -If the option \fB-failindex\fR with a variable name is given, the error reporting -is changed in the following manner: -in case of a conversion error, the position of the input byte causing the error -is returned in the given variable. The return value of the command are the -converted characters until the first error position. -In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. -.PP -The option \fB-nocomplain\fR has no effect, but assures to get the same result -in Tcl 9. -.PP -The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows invalid byte sequences and surrogates (which - -otherwise - are just passed through). This option may not be used together -with \fB-nocomplain\fR. -.VE "TCL8.7 TIP346, TIP607, TIP601" -.RE + +.VS "TCL8.7 TIP607, TIP656" +The \fB-profile\fR option determines the command behavior in the presence +of conversion errors. See \fBPROFILES\fR for details. Any premature +termination of processing due to errors is reported through an exception if +the \fB-failindex\fR option is not specified. + +If the \fB-failindex\fR is specified, instead of an exception being raised +on premature termination, the result of the conversion up to the point of the +error is returned as the result of the command. In addition, the index +of the source byte triggering the error is stored in \fBvar\fR. If no +errors are encountered, the entire result of the conversion is returned and +the value \fB-1\fR is stored in \fBvar\fR. +.VE "TCL8.7 TIP607, TIP656" +.TP +\fBencoding convertto\fR ?\fIencoding\fR? \fIdata\fR .TP -\fBencoding convertto\fR ?\fB-strict\fR? ?\fB-failindex var\fR? ?\fIencoding\fR? \fIdata\fR -\fBencoding convertto\fR \fB-nocomplain\fR ?\fIencoding\fR? \fIdata\fR +\fBencoding convertto\fR ?\fB-profile \fIprofile\fR? ?\fB-failindex var\fR? \fIencoding\fR \fIdata\fR . -Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. -The result is a sequence of bytes that represents the converted -string. Each byte is stored in the lower 8-bits of a Unicode -character (indeed, the resulting string is a binary string as far as -Tcl is concerned, at least initially). If \fIencoding\fR is not -specified, the current system encoding is used. -.VS "TCL8.7 TIP346, TIP607, TIP601" -.PP -.RS -The command does not fail on encoding errors (unless \fB-strict\fR is specified). -Instead, the replacement character \fB?\fR is output for any not representable -character (like the dot \fB\\U2022\fR in \fBiso-8859-1\fR encoding, see example below). -.PP -If the option \fB-failindex\fR with a variable name is given, the error reporting -is changed in the following manner: -in case of a conversion error, the position of the input character causing the error -is returned in the given variable. The return value of the command are the -converted bytes until the first error position. No error condition is raised. -In case of no error, the value \fI-1\fR is written to the variable. This option -may not be used together with \fB-nocomplain\fR. -.PP -The option \fB-nocomplain\fR has no effect, but assures to get the same result -in Tcl 9. -.PP -The \fB-strict\fR option follows more strict rules in conversion. For the \fButf-8\fR -encoder, it disallows surrogates (which - otherwise - are just passed through). This -option may not be used together with \fB-nocomplain\fR. -.VE "TCL8.7 TIP346, TIP607, TIP601" -.RE +Convert \fIstring\fR to the specified \fIencoding\fR. The result is a Tcl binary +string that contains the sequence of bytes representing the converted string in +the specified encoding. If \fIencoding\fR is not specified, the current system +encoding is used. + +.VS "TCL8.7 TIP607, TIP656" +The \fB-profile\fR and \fB-failindex\fR options have the same effect as +described for the \fBencoding convertfrom\fR command. +.VE "TCL8.7 TIP607, TIP656" .TP \fBencoding dirs\fR ?\fIdirectoryList\fR? . @@ -121,55 +91,140 @@ are guaranteed to be present in the list. Set the system encoding to \fIencoding\fR. If \fIencoding\fR is omitted then the command returns the current system encoding. The system encoding is used whenever Tcl passes strings to system calls. -.SH EXAMPLE +.TP +.VS "TCL8.7 TIP656" +\fBencoding profiles\fR +Returns a list of the names of encoding profiles. See \fBPROFILES\fR below. +.VE "TCL8.7 TIP656" +\" Do not put .VS on whole section as that messes up the bullet list alignment +.SH PROFILES +.PP +.VS "TCL8.7 TIP656" +Operations involving encoding transforms may encounter several types of +errors such as invalid sequences in the source data, characters that +cannot be encoded in the target encoding and so on. +A \fIprofile\fR prescribes the strategy for dealing with such errors +in one of two ways: +.VE "TCL8.7 TIP656" +. +.IP \(bu +.VS "TCL8.7 TIP656" +Terminating further processing of the source data. The profile does not +determine how this premature termination is conveyed to the caller. By default, +this is signalled by raising an exception. If the \fB-failindex\fR option +is specified, errors are reported through that mechanism. +.VE "TCL8.7 TIP656" +.IP \(bu +.VS "TCL8.7 TIP656" +Continue further processing of the source data using a fallback strategy such +as replacing or discarding the offending bytes in a profile-defined manner. +.VE "TCL8.7 TIP656" +.PP +The following profiles are currently implemented with \fBtcl8\fR being +the default if the \fB-profile\fR is not specified. +.VS "TCL8.7 TIP656" +.TP +\fBtcl8\fR +. +The \fBtcl8\fR profile always follows the first strategy above and corresponds +to the behavior of encoding transforms in Tcl 8.6. When converting from an +external encoding \fBother than utf-8\fR to Tcl strings with the \fBencoding +convertfrom\fR command, invalid bytes are mapped to their numerically equivalent +code points. For example, the byte 0x80 which is invalid in ASCII would be +mapped to code point U+0080. When converting from \fButf-8\fR, invalid bytes +that are defined in CP1252 are mapped to their Unicode equivalents while those +that are not fall back to the numerical equivalents. For example, byte 0x80 is +defined by CP1252 and is therefore mapped to its Unicode equivalent U+20AC while +byte 0x81 which is not defined by CP1252 is mapped to U+0081. As an additional +special case, the sequence 0xC0 0x80 is mapped to U+0000. + +When converting from Tcl strings to an external encoding format using +\fBencoding convertto\fR, characters that cannot be represented in the +target encoding are replaced by an encoding-dependent character, usually +the question mark \fB?\fR. +.TP +\fBstrict\fR +. +The \fBstrict\fR profile always stops processing when an conversion error is +encountered. The error is signalled via an exception or the \fB-failindex\fR +option mechanism. The \fBstrict\fR profile implements a Unicode standard +conformant behavior. +.TP +\fBreplace\fR +. +Like the \fBtcl8\fR profile, the \fBreplace\fR profile always continues +processing on conversion errors but follows a Unicode standard conformant +method for error handling. + +When converting an encoded byte sequence to a Tcl string using +\fBencoding convertfrom\fR, invalid bytes +are replaced by the U+FFFD REPLACEMENT CHARACTER code point. + +When encoding a Tcl string with \fBencoding convertto\fR, +code points that cannot be represented in the +target encoding are transformed to an encoding-specific fallback character, +U+FFFD REPLACEMENT CHARACTER for UTF targets and generally `?` for other +encodings. +.VE "TCL8.7 TIP656" +.SH EXAMPLES +.PP +These examples use the utility proc below that prints the Unicode code points +comprising a Tcl string. +.PP +.CS +proc codepoints {s} {join [lmap c [split $s ""] { + string cat U+ [format %.6X [scan $c %c]]}] +} +.CE .PP Example 1: convert a byte sequence in Japanese euc-jp encoding to a TCL string: .PP .CS -set s [\fBencoding convertfrom\fR euc-jp "\exA4\exCF"] +% codepoints [\fBencoding convertfrom\fR euc-jp "\exA4\exCF"] +U+00306F .CE .PP -The result is the unicode codepoint: +The result is the unicode codepoint .QW "\eu306F" , which is the Hiragana letter HA. -.VS "TCL8.7 TIP346, TIP607, TIP601" +.VS "TCL8.7 TIP607, TIP656" .PP -Example 2: detect the error location in an incomplete UTF-8 sequence: +Example 2: Error handling based on profiles: .PP +The letter \fBA\fR is Unicode character U+0041 and the byte "\ex80" is invalid +in ASCII encoding. .CS -% set s [\fBencoding convertfrom\fR -failindex i utf-8 "A\exC3"] -A -% set i -1 -.CE -.PP -Example 3: return the incomplete UTF-8 sequence by raw bytes: .PP -.CS -% set s [\fBencoding convertfrom\fR -nocomplain utf-8 "A\exC3"] +% codepoints [encoding convertfrom -profile tcl8 ascii A\ex80] +U+000041 U+000080 +% codepoints [encoding convertfrom -profile replace ascii A\ex80] +U+000041 U+00FFFD +% codepoints [encoding convertfrom -profile strict ascii A\ex80] +unexpected byte sequence starting at index 1: '\ex80' .CE -The result is "A" followed by the byte \exC3. The option \fB-nocomplain\fR -has no effect, but assures to get the same result with TCL9. .PP -Example 4: detect the error location while transforming to ISO8859-1 -(ISO-Latin 1): +Example 3: Get partial data and the error location: .PP .CS -% set s [\fBencoding convertto\fR -failindex i iso8859-1 "A\eu0141"] -A -% set i -1 +% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80] +U+000041 U+000042 +% set idx +2 .CE .PP -Example 5: replace a not representable character by the replacement character: +Example 4: Encode a character that is not representable in ISO8859-1: .PP .CS -% set s [\fBencoding convertto\fR -nocomplain iso8859-1 "A\eu0141"] +% encoding convertto iso8859-1 A\eu0141 A? +% encoding convertto -profile strict iso8859-1 A\eu0141 +unexpected character at index 1: 'U+000141' +% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141 +A +% set idx +1 .CE -The option \fB-nocomplain\fR has no effect, but assures to get the same result -in Tcl 9. -.VE "TCL8.7 TIP346, TIP607, TIP601" +.VE "TCL8.7 TIP607, TIP656" .PP .SH "SEE ALSO" Tcl_GetEncoding(3), fconfigure(n) diff --git a/doc/fconfigure.n b/doc/fconfigure.n index 9061161..526c5ad 100644 --- a/doc/fconfigure.n +++ b/doc/fconfigure.n @@ -101,8 +101,6 @@ locale-dependent system encoding used for interfacing with the operating system, as returned by \fBencoding system\fR. .RE .TP -\fB\-eofchar\fR \fIchar\fR -.TP \fB\-eofchar\fR \fB{\fIchar outChar\fB}\fR . This option supports DOS file systems that use Control-z (\ex1A) as an @@ -122,31 +120,16 @@ reading and the empty string for writing. The acceptable range for \fB\-eofchar\fR values is \ex01 - \ex7F; attempting to set \fB\-eofchar\fR to a value outside of this range will generate an error. -.VS "TCL8.7 TIP633" +.VS "TCL8.7 TIP656" .TP -\fB\-nocomplainencoding\fR \fIbool\fR +\fB\-profile\fR \fIprofile\fR . -Reporting mode of encoding errors. -If set to a \fItrue\fR value, encoding errors are resolved by a replacement -character (output) or verbatim bytes (input). No error is thrown. -This is the only available mode in Tcl 8.7. -.RS -.PP -Starting from TCL 9.0, this value may be set to a \fIfalse\fR value to throw errors -in case of encoding errors. -.RE -.VE "TCL8.7 TIP633" -.VS "TCL8.7 TIP346" -.TP -\fB\-strictencoding\fR \fIbool\fR -. -Activate additional stricter encoding application rules. -Default value is \fIfalse\fR. -.RS -.PP -See the \fI\-strict\fR option of the \fBencoding\fR command for more information. -.VE "TCL8.7 TIP346" -.RE +Specifies the encoding profile to be used on the channel. The encoding +transforms in use for the channel's input and output will then be subject to the +rules of that profile. Any failures will result in a channel error. See +\fBPROFILES\fR in the \fBencoding(n)\fR documentation for details about encoding +profiles. +.VE "TCL8.7 TIP656" .TP \fB\-translation\fR \fImode\fR .TP @@ -303,11 +286,11 @@ set data [read $f $numDataBytes] close $f .CE .SH "SEE ALSO" -close(n), flush(n), gets(n), open(n), puts(n), read(n), socket(n), +close(n), encoding(n), flush(n), gets(n), open(n), puts(n), read(n), socket(n), Tcl_StandardChannels(3) .SH KEYWORDS blocking, buffering, carriage return, end of line, flushing, linemode, -newline, nonblocking, platform, translation, encoding, filter, byte array, +newline, nonblocking, platform, profile, translation, encoding, filter, byte array, binary '\" Local Variables: '\" mode: nroff -- cgit v0.12 From 527e79c3a8b0d8df5cce3676a94700785584ef06 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Mar 2023 17:54:12 +0000 Subject: Fix passing of encoding state in testencoding Tcl_UtfToExternal --- generic/tclTest.c | 26 ++++++++++++++------------ tests/utfext.test | 5 +++++ 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index eb19d18..459461f 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2020,8 +2020,7 @@ static int UtfExtWrapper( Tcl_Interp *interp, UtfTransformFn *transformer, int objc, Tcl_Obj *const objv[]) { Tcl_Encoding encoding; - int encStateValue; /* Assumes Tcl_EncodingState points to integer!!! */ - Tcl_EncodingState encState; + Tcl_EncodingState encState, *encStatePtr; Tcl_Size srcLen, bufLen; const unsigned char *bytes; unsigned char *bufPtr; @@ -2085,13 +2084,16 @@ static int UtfExtWrapper( } /* Assumes state is integer if not "" */ - if (Tcl_GetIntFromObj(interp, objv[5], &encStateValue) == TCL_OK) { - encState = (Tcl_EncodingState)&encStateValue; + Tcl_WideInt wide; + if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { + encState = (Tcl_EncodingState) wide; + encStatePtr = &encState; } else if (Tcl_GetCharLength(objv[5]) == 0) { - encState = NULL; + encStatePtr = NULL; } else { return TCL_ERROR; } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { return TCL_ERROR; } @@ -2126,7 +2128,7 @@ static int UtfExtWrapper( "TCL_ENCODING_CHAR_LIMIT set in flags.", TCL_STATIC); return TCL_ERROR; } - if (Tcl_GetIntFromObj(interp, dstCharsVar, &dstChars) != TCL_OK) { + if (Tcl_GetIntFromObj(interp, valueObj, &dstChars) != TCL_OK) { return TCL_ERROR; } } else { @@ -2138,11 +2140,11 @@ static int UtfExtWrapper( memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ - result = (*transformer)(interp, encoding, (const char *) bytes, srcLen, flags, - &encState, (char *) bufPtr, dstLen, - srcReadVar ? &srcRead : NULL, - &dstWrote, - dstCharsVar ? &dstChars : NULL); + result = (*transformer)(interp, encoding, (const char *)bytes, srcLen, flags, + encStatePtr, (char *) bufPtr, dstLen, + srcReadVar ? &srcRead : NULL, + &dstWrote, + dstCharsVar ? &dstChars : NULL); if (memcmp(bufPtr + bufLen - 4, "\xAB\xCD\xEF\xAB", 4)) { Tcl_SetResult(interp, "Tcl_ExternalToUtf wrote past output buffer", @@ -2172,7 +2174,7 @@ static int UtfExtWrapper( } result = TCL_OK; resultObjs[1] = - encState ? Tcl_NewIntObj(encStateValue) : Tcl_NewObj(); + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { if (Tcl_ObjSetVar2(interp, diff --git a/tests/utfext.test b/tests/utfext.test index 175e3fa..b980800 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -88,6 +88,11 @@ test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1 } -result [list nospace {} \xFF] +# Another bug - char limit not obeyed +# % set cv 2 +# % testencoding Tcl_ExternalToUtf utf-8 abcdefgh {start end noterminate charlimit} {} 20 rv wv cv +# nospace {} abcÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿÿ + ::tcltest::cleanupTests return -- cgit v0.12 From aa14feed8b03a78ef20b0989d17b44b7e734243e Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 16 Mar 2023 18:15:46 +0000 Subject: Missed two tests. Blast it :-( --- tests/encoding.test | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 1be6fed..35340a6 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -872,10 +872,10 @@ test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { } -result \uD800 test encoding-24.38.1 {Try to generate invalid utf-8} -body { encoding convertto -profile tcl8 utf-8 \uD800 -} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} +} -result \xED\xA0\x80 test encoding-24.38.2 {Try to generate invalid utf-8} -body { encoding convertto -profile strict utf-8 \uD800 -} -result \xED\xA0\x80 +} -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { encoding convertto -profile strict utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -- cgit v0.12 From 3129864cc27566ec6c62c86299d366812f9ce82c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 20:12:23 +0000 Subject: If TCL_UTF_MAX=4, don't set 'exact' to 1 --- generic/tcl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tcl.h b/generic/tcl.h index 7a8c8a8..82430ba 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2425,7 +2425,7 @@ const char * TclTomMathInitializeStubs(Tcl_Interp *interp, # else # define Tcl_InitStubs(interp, version, exact) \ (Tcl_InitStubs)(interp, TCL_PATCH_LEVEL, \ - 1|(TCL_MAJOR_VERSION<<8)|(TCL_MINOR_VERSION<<16), \ + (exact)|(TCL_MAJOR_VERSION<<8)|(TCL_MINOR_VERSION<<16), \ TCL_STUB_MAGIC) # endif #else -- cgit v0.12 From 9c8a1292c0c8aba0cd2c718d12e953c86af6cd7d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 07:57:14 +0000 Subject: Don't introduce size_t in a header-file which didn't use it before. Make more clear that those are unsupported internal functions. --- generic/tcl.decls | 13 ++++++------- generic/tclDecls.h | 12 ++++++------ generic/tclStubInit.c | 14 ++++++++------ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 7f734c6..b50f775 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2326,24 +2326,23 @@ declare 630 { # ----- BASELINE -- FOR -- 8.6.0 ----- # -# TIP #481 (undocumented stub entries) +# (unsupported in Tcl 8.6) declare 651 { - char *TclGetStringFromObj_(Tcl_Obj *objPtr, size_t *lengthPtr) + char *TclGetStringFromObj_(Tcl_Obj *objPtr, void *lengthPtr) } declare 652 { - unsigned short *TclGetUnicodeFromObj_(Tcl_Obj *objPtr, size_t *lengthPtr) + unsigned short *TclGetUnicodeFromObj_(Tcl_Obj *objPtr, void *lengthPtr) } -# Only available in Tcl 8.x, NULL in Tcl 9.0 declare 653 { - unsigned char *TclGetByteArrayFromObj_(Tcl_Obj *objPtr, size_t *numBytesPtr) + unsigned char *TclGetByteArrayFromObj_(Tcl_Obj *objPtr, void *numBytesPtr) } +# ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # + declare 687 { void TclUnusedStubEntry(void) } -# ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # - ############################################################################## # Define the platform specific public Tcl interface. These functions are only diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 551a5b6..078974c 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1837,13 +1837,13 @@ EXTERN void Tcl_ZlibStreamSetCompressionDictionary( /* Slot 650 is reserved */ /* 651 */ EXTERN char * TclGetStringFromObj_(Tcl_Obj *objPtr, - size_t *lengthPtr); + void *lengthPtr); /* 652 */ EXTERN unsigned short * TclGetUnicodeFromObj_(Tcl_Obj *objPtr, - size_t *lengthPtr); + void *lengthPtr); /* 653 */ EXTERN unsigned char * TclGetByteArrayFromObj_(Tcl_Obj *objPtr, - size_t *numBytesPtr); + void *numBytesPtr); /* Slot 654 is reserved */ /* Slot 655 is reserved */ /* Slot 656 is reserved */ @@ -2565,9 +2565,9 @@ typedef struct TclStubs { void (*reserved648)(void); void (*reserved649)(void); void (*reserved650)(void); - char * (*tclGetStringFromObj_) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 651 */ - unsigned short * (*tclGetUnicodeFromObj_) (Tcl_Obj *objPtr, size_t *lengthPtr); /* 652 */ - unsigned char * (*tclGetByteArrayFromObj_) (Tcl_Obj *objPtr, size_t *numBytesPtr); /* 653 */ + char * (*tclGetStringFromObj_) (Tcl_Obj *objPtr, void *lengthPtr); /* 651 */ + unsigned short * (*tclGetUnicodeFromObj_) (Tcl_Obj *objPtr, void *lengthPtr); /* 652 */ + unsigned char * (*tclGetByteArrayFromObj_) (Tcl_Obj *objPtr, void *numBytesPtr); /* 653 */ void (*reserved654)(void); void (*reserved655)(void); void (*reserved656)(void); diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 565dd8c..ff3a099 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -139,15 +139,17 @@ static const char *TclGetStartupScriptFileName(void) return Tcl_GetString(path); } +# (unsupported in Tcl 8.6) + #define TclGetStringFromObj_ getStringFromObj static char * TclGetStringFromObj_( Tcl_Obj *objPtr, - size_t *lengthPtr) + void *lengthPtr) { int length; char *result = Tcl_GetStringFromObj(objPtr, &length); - *lengthPtr = (size_t)length; + *(size_t *)lengthPtr = (size_t)length; return result; } @@ -155,11 +157,11 @@ TclGetStringFromObj_( static unsigned short * TclGetUnicodeFromObj_( Tcl_Obj *objPtr, - size_t *lengthPtr) + void *lengthPtr) { int length; Tcl_UniChar *result = Tcl_GetUnicodeFromObj(objPtr, &length); - *lengthPtr = (size_t)length; + *(size_t *)lengthPtr = (size_t)length; return result; } @@ -167,11 +169,11 @@ TclGetUnicodeFromObj_( static unsigned char * TclGetByteArrayFromObj_( Tcl_Obj *objPtr, - size_t *numBytesPtr) + void *numBytesPtr) { int numBytes; unsigned char *result = Tcl_GetByteArrayFromObj(objPtr, &numBytes); - *numBytesPtr = (size_t)numBytes; + *(size_t *)numBytesPtr = (size_t)numBytes; return result; } -- cgit v0.12 From 4aa63fcc254f450717aba4c135f87fedcfdd38cc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 08:24:59 +0000 Subject: Don't return from a void function --- generic/tclStubInit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index ff3a099..1ef7f17 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -187,7 +187,7 @@ static unsigned short TclWinNToHS(unsigned short ns) { #define TclWinConvertError_ winConvertError static void TclWinConvertError_(unsigned errCode) { - return TclWinConvertError(errCode); + TclWinConvertError(errCode); } #endif -- cgit v0.12 From 87b64566847ce5fda7292ec8b2d2de3739e7e680 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Mar 2023 12:35:51 +0000 Subject: unbreak the build --- generic/tclStubInit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 1ef7f17..c504586 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -139,7 +139,7 @@ static const char *TclGetStartupScriptFileName(void) return Tcl_GetString(path); } -# (unsupported in Tcl 8.6) +/* (unsupported in Tcl 8.6) */ #define TclGetStringFromObj_ getStringFromObj static char * -- cgit v0.12 From 13b1529c7b6f4da55532170ee08ac047581b6300 Mon Sep 17 00:00:00 2001 From: dgp Date: Fri, 17 Mar 2023 14:22:44 +0000 Subject: Clean up after events to avoid errors in later tests that use an event loop. --- tests/ioTrans.test | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/ioTrans.test b/tests/ioTrans.test index 3a23e61..44e7d64 100644 --- a/tests/ioTrans.test +++ b/tests/ioTrans.test @@ -643,6 +643,9 @@ namespace eval reflector { proc finalize {_ chan} { + foreach id [after info] { + after cancel $id + } namespace delete $_ } -- cgit v0.12 From e9ccf557eb23f66c28210e967e344a61fef2ed58 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 16:12:23 +0000 Subject: Fix [6390566ecd]: Testcase cmdAH-4.3.13.00DC0000.tail.utf-32.tcl8.a fails sometimes --- generic/tclEncoding.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index f15b479..5a89644 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2635,9 +2635,12 @@ Utf32ToUtfProc( * unsigned short-size data. */ - if ((ch > 0) && (ch < 0x80)) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); } else { + if (((prev & ~0x3FF) != 0xD800) && ((ch & ~0x3FF) == 0xDC00)) { + *dst = 0; /* In case of lower surrogate, don't try to combine */ + } dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned int); @@ -2856,7 +2859,7 @@ Utf16ToUtfProc( } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ numChars--; @@ -2877,7 +2880,7 @@ Utf16ToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else if (((ch & ~0x3FF) == 0xDC00) && ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { /* Lo surrogate not preceded by Hi surrogate */ - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; break; } else { *dst = 0; /* In case of lower surrogate, don't try to combine */ @@ -2888,7 +2891,7 @@ Utf16ToUtfProc( if ((ch & ~0x3FF) == 0xD800) { if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { - result = TCL_CONVERT_UNKNOWN; + result = TCL_CONVERT_SYNTAX; src -= 2; dst--; numChars--; -- cgit v0.12 From 453c27a88e9da3cb50fefe2c4a5fb7a7d09b8afc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 20:04:48 +0000 Subject: arm64e -> arm64, since arm64e is not available yet on MacOS (Thanks to Stefan Sobernig) --- unix/configure | 24 ++++++++++++------------ unix/tcl.m4 | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/unix/configure b/unix/configure index 2ebb2ea..16210e6 100755 --- a/unix/configure +++ b/unix/configure @@ -7669,15 +7669,15 @@ echo "${ECHO_T}$tcl_cv_cc_arch_x86_64" >&6 fi ;; - arm64|arm64e) - echo "$as_me:$LINENO: checking if compiler accepts -arch arm64e flag" >&5 -echo $ECHO_N "checking if compiler accepts -arch arm64e flag... $ECHO_C" >&6 -if test "${tcl_cv_cc_arch_arm64e+set}" = set; then + arm64) + echo "$as_me:$LINENO: checking if compiler accepts -arch arm64 flag" >&5 +echo $ECHO_N "checking if compiler accepts -arch arm64 flag... $ECHO_C" >&6 +if test "${tcl_cv_cc_arch_arm64+set}" = set; then echo $ECHO_N "(cached) $ECHO_C" >&6 else hold_cflags=$CFLAGS - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" cat >conftest.$ac_ext <<_ACEOF /* confdefs.h. */ _ACEOF @@ -7715,22 +7715,22 @@ if { (eval echo "$as_me:$LINENO: \"$ac_link\"") >&5 ac_status=$? echo "$as_me:$LINENO: \$? = $ac_status" >&5 (exit $ac_status); }; }; then - tcl_cv_cc_arch_arm64e=yes + tcl_cv_cc_arch_arm64=yes else echo "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -tcl_cv_cc_arch_arm64e=no +tcl_cv_cc_arch_arm64=no fi rm -f conftest.err conftest.$ac_objext \ conftest$ac_exeext conftest.$ac_ext CFLAGS=$hold_cflags fi -echo "$as_me:$LINENO: result: $tcl_cv_cc_arch_arm64e" >&5 -echo "${ECHO_T}$tcl_cv_cc_arch_arm64e" >&6 - if test $tcl_cv_cc_arch_arm64e = yes; then +echo "$as_me:$LINENO: result: $tcl_cv_cc_arch_arm64" >&5 +echo "${ECHO_T}$tcl_cv_cc_arch_arm64" >&6 + if test $tcl_cv_cc_arch_arm64 = yes; then - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" do64bit_ok=yes fi @@ -7743,7 +7743,7 @@ echo "$as_me: WARNING: Don't know how enable 64-bit on architecture \`arch\`" >& else # Check for combined 32-bit and 64-bit fat build - if echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64e) ' \ + if echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64) ' \ && echo "$CFLAGS " |grep -E -q -- '-arch (ppc|i386) '; then fat_32_64=yes diff --git a/unix/tcl.m4 b/unix/tcl.m4 index d9d0a71..0ef9f3d 100644 --- a/unix/tcl.m4 +++ b/unix/tcl.m4 @@ -1541,16 +1541,16 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ CFLAGS="$CFLAGS -arch x86_64" do64bit_ok=yes ]);; - arm64|arm64e) - AC_CACHE_CHECK([if compiler accepts -arch arm64e flag], - tcl_cv_cc_arch_arm64e, [ + arm64) + AC_CACHE_CHECK([if compiler accepts -arch arm64 flag], + tcl_cv_cc_arch_arm64, [ hold_cflags=$CFLAGS - CFLAGS="$CFLAGS -arch arm64e" + CFLAGS="$CFLAGS -arch arm64" AC_LINK_IFELSE([AC_LANG_PROGRAM([[]], [[]])], - [tcl_cv_cc_arch_arm64e=yes],[tcl_cv_cc_arch_arm64e=no]) + [tcl_cv_cc_arch_arm64=yes],[tcl_cv_cc_arch_arm64=no]) CFLAGS=$hold_cflags]) - AS_IF([test $tcl_cv_cc_arch_arm64e = yes], [ - CFLAGS="$CFLAGS -arch arm64e" + AS_IF([test $tcl_cv_cc_arch_arm64 = yes], [ + CFLAGS="$CFLAGS -arch arm64" do64bit_ok=yes ]);; *) @@ -1558,7 +1558,7 @@ AC_DEFUN([SC_CONFIG_CFLAGS], [ esac ], [ # Check for combined 32-bit and 64-bit fat build - AS_IF([echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64e) ' \ + AS_IF([echo "$CFLAGS " |grep -E -q -- '-arch (ppc64|x86_64|arm64) ' \ && echo "$CFLAGS " |grep -E -q -- '-arch (ppc|i386) '], [ fat_32_64=yes]) ]) -- cgit v0.12 From bdad96ab6988802901289f1b4d1f366a2002f023 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 17 Mar 2023 20:10:29 +0000 Subject: Few more arm64e -> arm64 --- macosx/Tcl.xcodeproj/project.pbxproj | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/macosx/Tcl.xcodeproj/project.pbxproj b/macosx/Tcl.xcodeproj/project.pbxproj index 4143128..68b9418 100644 --- a/macosx/Tcl.xcodeproj/project.pbxproj +++ b/macosx/Tcl.xcodeproj/project.pbxproj @@ -2132,7 +2132,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; MACOSX_DEPLOYMENT_TARGET = 10.6; PREBINDING = NO; }; @@ -2517,7 +2517,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; GCC_VERSION = 4.0; MACOSX_DEPLOYMENT_TARGET = 10.6; PREBINDING = NO; @@ -2555,7 +2555,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; DEBUG_INFORMATION_FORMAT = dwarf; GCC = "llvm-gcc"; GCC_OPTIMIZATION_LEVEL = 4; @@ -2695,7 +2695,7 @@ ARCHS = ( "$(NATIVE_ARCH_64_BIT)", ); - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; DEBUG_INFORMATION_FORMAT = dwarf; GCC = clang; GCC_OPTIMIZATION_LEVEL = 4; @@ -2762,7 +2762,7 @@ baseConfigurationReference = F97AE82B0B65C69B00310EA2 /* Tcl-Release.xcconfig */; buildSettings = { ARCHS = "$(ARCHS_STANDARD_64_BIT)"; - CFLAGS = "-arch x86_64 -arch arm64e $(CFLAGS)"; + CFLAGS = "-arch x86_64 -arch arm64 $(CFLAGS)"; CPPFLAGS = "-isysroot $(SDKROOT) $(CPPFLAGS)"; MACOSX_DEPLOYMENT_TARGET = 10.5; PREBINDING = NO; -- cgit v0.12 From 5fbaf44ecf9e3c77c88088e83c53b51cd8af05db Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 08:42:39 +0000 Subject: Manpage fixes --- doc/Encoding.3 | 10 ++++------ doc/encoding.n | 2 +- doc/fconfigure.n | 6 ++++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 76ea193..7453549 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -8,7 +8,7 @@ .so man.macros .BS .SH NAME -Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_UtfToExternalDStringEx, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternalDStringEx, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings +Tcl_GetEncoding, Tcl_FreeEncoding, Tcl_GetEncodingFromObj, Tcl_ExternalToUtfDString, Tcl_ExternalToUtfDStringEx, Tcl_ExternalToUtf, Tcl_UtfToExternalDString, Tcl_UtfToExternalDStringEx, Tcl_UtfToExternal, Tcl_GetEncodingName, Tcl_SetSystemEncoding, Tcl_GetEncodingNameFromEnvironment, Tcl_GetEncodingNames, Tcl_CreateEncoding, Tcl_GetEncodingSearchPath, Tcl_SetEncodingSearchPath, Tcl_GetDefaultEncodingDir, Tcl_SetDefaultEncodingDir \- procedures for creating and using encodings .SH SYNOPSIS .nf \fB#include \fR @@ -223,7 +223,6 @@ call \fBTcl_DStringFree\fR to free any information stored in \fIdstPtr\fR. When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. - .PP \fBTcl_ExternalToUtfDStringEx\fR is a more flexible version of older \fBTcl_ExternalToUtfDString\fR function. It takes three additional parameters, @@ -235,7 +234,7 @@ into the function. On success, the function returns \fBTCL_ERROR\fR with the converted string stored in \fB*dstPtr\fR. For errors other than conversion errors, such as invalid flags, the function returns \fBTCL_OK\fR with an error message in \fBinterp\fR if it is not NULL. - +.PP For conversion errors, \fBTcl_ExternalToUtfDStringEx\fR returns one of the \fBTCL_CONVERT_*\fR errors listed below for \fBTcl_ExternalToUtf\fR. When one of these conversion errors is returned, an error message is @@ -243,10 +242,9 @@ stored in \fBinterp\fR only if \fBerrorIdxPtr\fR is NULL. Otherwise, no error me is stored as the function expects the caller is interested whatever is decoded to that point and not treating this as an immediate error condition. The index of the error location is stored in \fB*errorIdxPtr\fR. - +.PP The caller must call \fBTcl_DStringFree\fR to free up the \fB*dstPtr\fR resources irrespective of the return value from the function. - .PP \fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified \fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the @@ -626,7 +624,7 @@ with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles respectively. If none are specified, a version-dependent default profile is used. For Tcl 8.7, the default profile is \fBtcl8\fR. - +.PP For details about profiles, see the \fBPROFILES\fR section in the documentation of the \fBencoding\fR command. .SH "SEE ALSO" diff --git a/doc/encoding.n b/doc/encoding.n index 7266311..8ede974 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -85,8 +85,8 @@ The encodings and .QW iso8859-1 are guaranteed to be present in the list. -.TP .VS "TCL8.7 TIP656" +.TP \fBencoding profiles\fR Returns a list of the names of encoding profiles. See \fBPROFILES\fR below. .VE "TCL8.7 TIP656" diff --git a/doc/fconfigure.n b/doc/fconfigure.n index 526c5ad..3de22eb 100644 --- a/doc/fconfigure.n +++ b/doc/fconfigure.n @@ -101,6 +101,8 @@ locale-dependent system encoding used for interfacing with the operating system, as returned by \fBencoding system\fR. .RE .TP +\fB\-eofchar\fR \fIchar\fR +.TP \fB\-eofchar\fR \fB{\fIchar outChar\fB}\fR . This option supports DOS file systems that use Control-z (\ex1A) as an @@ -111,8 +113,8 @@ If \fIchar\fR is the empty string, then there is no special end of file character marker. For read-write channels, a two-element list specifies the end of file marker for input and output, respectively. As a convenience, when setting the end-of-file character for a read-write -channel you can specify a single value that will apply to both reading -and writing. When querying the end-of-file character of a read-write +channel you can specify a single value that will apply to reading +only. When querying the end-of-file character of a read-write channel, a two-element list will always be returned. The default value for \fB\-eofchar\fR is the empty string in all cases except for files under Windows. In that case the \fB\-eofchar\fR is Control-z (\ex1A) for -- cgit v0.12 From 278b807336757abd553f464b172b6d751f92b3c9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 08:50:34 +0000 Subject: Make "tcltest" package use "-profile tcl8" internally, irrespective of what the default profile is --- library/tcltest/tcltest.tcl | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index dbe1eae..278a4e0 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -400,7 +400,7 @@ namespace eval tcltest { default { set outputChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $outputChannel -encoding utf-8 + fconfigure $outputChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($outputChannel) 1 @@ -447,7 +447,7 @@ namespace eval tcltest { default { set errorChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $errorChannel -encoding utf-8 + fconfigure $errorChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($errorChannel) 1 @@ -792,7 +792,7 @@ namespace eval tcltest { if {$Option(-loadfile) eq {}} {return} set tmp [open $Option(-loadfile) r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $tmp -encoding utf-8 + fconfigure $tmp -profile tcl8 -encoding utf-8 } loadScript [read $tmp] close $tmp @@ -1372,7 +1372,7 @@ proc tcltest::DefineConstraintInitializers {} { set code 0 if {![catch {set f [open "|[list [interpreter]]" w]}]} { if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } if {![catch {puts $f exit}]} { if {![catch {close $f}]} { @@ -2222,7 +2222,7 @@ proc tcltest::test {name description args} { if {[file readable $testFile]} { set testFd [open $testFile r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $testFd -encoding utf-8 + fconfigure $testFd -profile tcl8 -encoding utf-8 } set testLine [expr {[lsearch -regexp \ [split [read $testFd] "\n"] \ @@ -2253,7 +2253,11 @@ proc tcltest::test {name description args} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" + if {[catch { + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" + } errMsg]} { + puts [outputChannel] "\n---- Result was:\n" + } puts [outputChannel] "---- Result should have been\ ($match matching):\n[Asciify $result]" } @@ -2933,7 +2937,7 @@ proc tcltest::runAllTests { {shell ""} } { incr numTestFiles set pipeFd [open $cmd "r"] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $pipeFd -encoding utf-8 + fconfigure $pipeFd -profile tcl8 -encoding utf-8 } while {[gets $pipeFd line] >= 0} { if {[regexp [join { @@ -3133,7 +3137,7 @@ proc tcltest::makeFile {contents name {directory ""}} { set fd [open $fullName w] fconfigure $fd -translation lf if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $fd -encoding utf-8 + fconfigure $fd -profile tcl8 -encoding utf-8 } if {[string index $contents end] eq "\n"} { puts -nonewline $fd $contents @@ -3284,7 +3288,7 @@ proc tcltest::viewFile {name {directory ""}} { set fullName [file join $directory $name] set f [open $fullName] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } set data [read -nonewline $f] close $f -- cgit v0.12 From 5e2a39d6265fa8f5f2931a5823910a6c6cc002ae Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 09:02:15 +0000 Subject: Make http package use "-profile tcl8", irrespective of the default profile in Tcl, until decided differently --- library/http/http.tcl | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 1f476f3..4ef6c73 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1746,6 +1746,9 @@ proc http::OpenSocket {token DoLater} { } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 \ + } ##Log socket opened, DONE fconfigure - token $token } @@ -2164,6 +2167,9 @@ proc http::Connected {token proto phost srvurl} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 \ + } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. @@ -2554,6 +2560,9 @@ proc http::ReceiveResponse {token} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile tcl8 \ + } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token @@ -4545,7 +4554,11 @@ proc http::Eot {token {reason {}}} { set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { - set state(body) [encoding convertfrom $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + } else { + set state(body) [encoding convertfrom $enc $state(body)] + } } # Translate text line endings. @@ -4628,7 +4641,11 @@ proc http::GuessType {token} { if {$enc eq "binary"} { return 0 } - set state(body) [encoding convertfrom $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + } else { + set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + } set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml set state(binary) 0 @@ -4709,7 +4726,11 @@ proc http::quoteString {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - set string [encoding convertto $http(-urlencoding) $string] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] + } else { + set string [encoding convertto $http(-urlencoding) $string] + } return [string map $formMap $string] } -- cgit v0.12 From 7834acd2e42f731cb81a37176d8c8cbc371e43f0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 09:07:20 +0000 Subject: one too much "-profile tcl8" --- library/http/http.tcl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 4ef6c73..c0f6e5d 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -4644,7 +4644,7 @@ proc http::GuessType {token} { if {[package vsatisfies [package provide Tcl] 9.0-]} { set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] } else { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + set state(body) [encoding convertfrom $enc $state(body)] } set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml -- cgit v0.12 From 291ff6db1bd984a81e33e093fce433f8a4967f33 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 11:44:38 +0000 Subject: Remove unneeded backslash --- library/http/http.tcl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index c0f6e5d..79f876a 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1747,7 +1747,7 @@ proc http::OpenSocket {token DoLater} { fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 \ + fconfigure $sock -profile tcl8 } ##Log socket opened, DONE fconfigure - token $token } @@ -2168,7 +2168,7 @@ proc http::Connected {token proto phost srvurl} { fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 \ + fconfigure $sock -profile tcl8 } # The following is disallowed in safe interpreters, but the socket is @@ -2561,7 +2561,7 @@ proc http::ReceiveResponse {token} { fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 \ + fconfigure $sock -profile tcl8 } Log ^D$tk begin receiving response - token $token -- cgit v0.12 From 800d78f04d79def339bde5edb9042b6288524460 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 11:56:58 +0000 Subject: Don't let httpd11 depend on the system encoding any more: All text files are now stored in utf-8. --- tests/httpd11.tcl | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index b605005..e97f403 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -150,7 +150,11 @@ proc Service {chan addr port} { if {[file exists $path] && [file isfile $path]} { foreach {what type} [mime-type $path] break set f [open $path r] - if {$what eq "binary"} {chan configure $f -translation binary} + if {$what eq "binary"} { + chan configure $f -translation binary} + } else { + chan configure $f -encoding utf-8} + } set data [read $f] close $f set code "200 OK" -- cgit v0.12 From 449998d3589b262b87b42a2ad586dd3c70b4b9e2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 13:23:03 +0000 Subject: Proposed fix for [5ae5ffc3f4]: Problem with -failindex on 32-bit platform. This also fixes a memory-leak. --- generic/tclCmdAH.c | 17 +++++++++++------ generic/tclInt.h | 9 ++++++++- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 7fab2f0..dff231d 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -13,6 +13,7 @@ #include "tclInt.h" #include "tclIO.h" +#include "tclTomMath.h" #ifdef _WIN32 # include "tclWinInt.h" #endif @@ -580,13 +581,15 @@ EncodingConvertfromObjCmd( * data as was converted. */ if (failVarObj) { - /* I hope, wide int will cover Tcl_Size data type */ + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); if (Tcl_ObjSetVar2(interp, failVarObj, NULL, - Tcl_NewWideIntObj(errorLocation), + failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DStringFree(&ds); + Tcl_DecrRefCount(failIndex); + Tcl_DStringFree(&ds); return TCL_ERROR; } } @@ -676,13 +679,15 @@ EncodingConverttoObjCmd( * data as was converted. */ if (failVarObj) { - /* I hope, wide int will cover Tcl_Size data type */ + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); if (Tcl_ObjSetVar2(interp, failVarObj, NULL, - Tcl_NewWideIntObj(errorLocation), + failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DStringFree(&ds); + Tcl_DecrRefCount(failIndex); + Tcl_DStringFree(&ds); return TCL_ERROR; } } diff --git a/generic/tclInt.h b/generic/tclInt.h index a90ac79..aa9247f 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4912,7 +4912,14 @@ MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit; } while (0) #define TclNewIndexObj(objPtr, w) \ - (objPtr) = (((Tcl_WideUInt)w) >= TCL_INDEX_NONE) ? Tcl_NewWideIntObj(-1) : Tcl_NewWideIntObj(w) + do { + Tcl_WideUInt _uw = (Tcl_WideUInt)w; + if (_uw >= TCL_INDEX_NONE) { + TclNewIntObj(objPtr, -1); + } else { + TclNewUIntObj(objPtr, w); + } + } while (0) #define TclNewDoubleObj(objPtr, d) \ (objPtr) = Tcl_NewDoubleObj(d) -- cgit v0.12 From 78c295898e25376830b59e74621cbcf9f118c495 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 13:32:46 +0000 Subject: Doesn't look like a mem-leak: It appears that Tcl_ObjSetVar2() already handles that. --- generic/tclCmdAH.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index dff231d..5dbadb8 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -588,7 +588,6 @@ EncodingConvertfromObjCmd( NULL, failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DecrRefCount(failIndex); Tcl_DStringFree(&ds); return TCL_ERROR; } @@ -686,7 +685,6 @@ EncodingConverttoObjCmd( NULL, failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DecrRefCount(failIndex); Tcl_DStringFree(&ds); return TCL_ERROR; } -- cgit v0.12 From 1876fe215dfd8f18fcacb7967ede6fe9221bdf29 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 13:34:04 +0000 Subject: Another fix: don't access (w) twice --- generic/tclInt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index aa9247f..6aa05a8 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4917,7 +4917,7 @@ MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit; if (_uw >= TCL_INDEX_NONE) { TclNewIntObj(objPtr, -1); } else { - TclNewUIntObj(objPtr, w); + TclNewUIntObj(objPtr, _uw); } } while (0) -- cgit v0.12 From 2d9a47cff10b0ed3a76254dbeb03b5ec987170f4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 19 Mar 2023 21:51:50 +0000 Subject: Possible fix for [d7fd37ebd9]: handling leftover prefix in table encoding --- generic/tclEncoding.c | 30 +++++++++++++++++------------- tests/chanio.test | 2 +- 2 files changed, 18 insertions(+), 14 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0478519..69b7b6c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3413,18 +3413,22 @@ TableToUtfProc( if (prefixBytes[byte]) { src++; if (src >= srcEnd) { - /* - * TODO - this is broken. For consistency with other - * decoders, an error should be raised only if strict. - * However, doing that check cause a whole bunch of test - * failures. Need to verify if those tests are in fact - * correct. - */ - src--; - result = TCL_CONVERT_MULTIBYTE; - break; + if (!(flags & TCL_ENCODING_END)) { + src--; + result = TCL_CONVERT_MULTIBYTE; + break; + } else if (PROFILE_STRICT(flags)) { + src--; + result = TCL_CONVERT_SYNTAX; + break; + } else if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } else { + ch = (Tcl_UniChar)byte; + } + } else { + ch = toUnicode[byte][*((unsigned char *)src)]; } - ch = toUnicode[byte][*((unsigned char *)src)]; } else { ch = pageZero[byte]; } @@ -3447,7 +3451,7 @@ TableToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); @@ -3648,7 +3652,7 @@ Iso88591ToUtfProc( * Special case for 1-byte utf chars for speed. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (char) ch; } else { dst += Tcl_UniCharToUtf(ch, dst); diff --git a/tests/chanio.test b/tests/chanio.test index d2008e6..7221141 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1104,7 +1104,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "123456789012301" 18 0 1 -1 ""] +} -result [list 16 "123456789012301\x82" 18 0 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { -- cgit v0.12 From 75664c655d15e9308cf62fcdaee3bed1c4545c63 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 20 Mar 2023 02:36:36 +0000 Subject: Fix http11 test hang caused by trailing brace in previous commit --- tests/httpd11.tcl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/httpd11.tcl b/tests/httpd11.tcl index e97f403..9e0edcd 100644 --- a/tests/httpd11.tcl +++ b/tests/httpd11.tcl @@ -151,9 +151,9 @@ proc Service {chan addr port} { foreach {what type} [mime-type $path] break set f [open $path r] if {$what eq "binary"} { - chan configure $f -translation binary} + chan configure $f -translation binary } else { - chan configure $f -encoding utf-8} + chan configure $f -encoding utf-8 } set data [read $f] close $f -- cgit v0.12 From b273a9c1d4c036fce56101f8723037d491d6618f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 11:40:25 +0000 Subject: Use TclNewIndexObj() in stead of Tcl_NewWideIntObj(), which - actually - does the same but better for debugging. --- generic/tclCmdAH.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index ff0d00f..6c46c8e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -720,13 +720,14 @@ EncodingConvertfromObjCmd( * data as was converted. */ if (failVarObj) { - /* I hope, wide int will cover Tcl_Size data type */ + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); if (Tcl_ObjSetVar2(interp, failVarObj, NULL, - Tcl_NewWideIntObj(errorLocation), + failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DStringFree(&ds); + Tcl_DStringFree(&ds); return TCL_ERROR; } } @@ -816,13 +817,14 @@ EncodingConverttoObjCmd( * data as was converted. */ if (failVarObj) { - /* I hope, wide int will cover Tcl_Size data type */ + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); if (Tcl_ObjSetVar2(interp, failVarObj, NULL, - Tcl_NewWideIntObj(errorLocation), + failIndex, TCL_LEAVE_ERR_MSG) == NULL) { - Tcl_DStringFree(&ds); + Tcl_DStringFree(&ds); return TCL_ERROR; } } @@ -2952,7 +2954,7 @@ EachloopCmd( result = TCL_ERROR; goto done; } - /* Don't compute values here, wait until the last momement */ + /* Don't compute values here, wait until the last moment */ statePtr->argcList[i] = TclArithSeriesObjLength(statePtr->aCopyList[i]); } else { /* List values */ -- cgit v0.12 From cc2747afa84298fa9e583a491f772524708e24a7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 12:08:58 +0000 Subject: Add longIs64bit constraint to lseq-4.4 (because this isn't expected to work on 32-bit platforms). Remove some unused constraints. --- tests/compExpr-old.test | 3 --- tests/execute.test | 1 - tests/expr-old.test | 1 - tests/expr.test | 1 - tests/lseq.test | 3 ++- 5 files changed, 2 insertions(+), 7 deletions(-) diff --git a/tests/compExpr-old.test b/tests/compExpr-old.test index 5f705c3..ec7eda1 100644 --- a/tests/compExpr-old.test +++ b/tests/compExpr-old.test @@ -78,9 +78,6 @@ proc testIEEE {} { } testConstraint ieeeFloatingPoint [testIEEE] -testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] -testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}] - # procedures used below proc put_hello_char {c} { diff --git a/tests/execute.test b/tests/execute.test index 6d8ce99..90af21c 100644 --- a/tests/execute.test +++ b/tests/execute.test @@ -34,7 +34,6 @@ testConstraint testobj [expr { && [llength [info commands teststringobj]] }] -testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] testConstraint testexprlongobj [llength [info commands testexprlongobj]] diff --git a/tests/expr-old.test b/tests/expr-old.test index 7344e08..7274851 100644 --- a/tests/expr-old.test +++ b/tests/expr-old.test @@ -25,7 +25,6 @@ testConstraint testexprlong [llength [info commands testexprlong]] testConstraint testexprdouble [llength [info commands testexprdouble]] testConstraint testexprstring [llength [info commands testexprstring]] testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] -testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}] # Big test for correct ordering of data in [expr] diff --git a/tests/expr.test b/tests/expr.test index a20aee1..15eff76 100644 --- a/tests/expr.test +++ b/tests/expr.test @@ -21,7 +21,6 @@ if {"::tcltest" ni [namespace children]} { # type is a 64 bit number on this machine. testConstraint longIs32bit [expr {$tcl_platform(wordSize) == 4}] -testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}] testConstraint wideIs64bit [expr {wide(0x8000000000000000) < 0}] # Big test for correct ordering of data in [expr] diff --git a/tests/lseq.test b/tests/lseq.test index 3f68da4..a280069 100644 --- a/tests/lseq.test +++ b/tests/lseq.test @@ -17,6 +17,7 @@ if {"::tcltest" ni [namespace children]} { testConstraint arithSeriesDouble 1 testConstraint arithSeriesShimmer 1 testConstraint arithSeriesShimmerOk 1 +testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}] ## Arg errors test lseq-1.1 {error cases} -body { @@ -482,7 +483,7 @@ test lseq-4.3 {TIP examples} { # # Ticket 9933cc4d88697f05976accebd31c1e3ba6efe9c6 - lseq corner case -test lseq-4.4 {lseq corner case} -body { +test lseq-4.4 {lseq corner case} -constraints longIs64bit -body { set tcmd { set res {} set s [catch {lindex [lseq 10 100] 0} e] -- cgit v0.12 From 5319a7d93f431d1921f3c93112027d79e43b988a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 14:18:47 +0000 Subject: Fix [bdcb5126c0]: Failed assertion in test chan-io-7.3 --- generic/tclEncoding.c | 3 ++- tests/chanio.test | 2 +- tests/io.test | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 35b74c7..93e4171 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3424,7 +3424,8 @@ TableToUtfProc( } else if (PROFILE_REPLACE(flags)) { ch = UNICODE_REPLACE_CHAR; } else { - numChars++; /* Silently consume */ + src--; /* See bug [bdcb5126c0] */ + result = TCL_CONVERT_MULTIBYTE; break; } } else { diff --git a/tests/chanio.test b/tests/chanio.test index b73e681..d2008e6 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1104,7 +1104,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { lappend x [chan gets $f line] $line } -cleanup { chan close $f -} -result [list 15 "123456789012301" 17 1 1 -1 ""] +} -result [list 15 "123456789012301" 18 0 1 -1 ""] test chan-io-7.4 {FilterInputBytes: recover from split up character} -setup { variable x "" } -constraints {stdio fileevent} -body { diff --git a/tests/io.test b/tests/io.test index eb4abbd..c3c0cdd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1136,7 +1136,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { lappend x [gets $f line] $line close $f set x -} [list 15 "123456789012301" 17 1 1 -1 ""] +} [list 15 "123456789012301" 18 0 1 -1 ""] test io-7.4 {FilterInputBytes: recover from split up character} {stdio fileevent} { set f [open "|[list [interpreter] $path(cat)]" w+] fconfigure $f -encoding binary -buffering none -- cgit v0.12 From 8133df3b6d12fd4fa798c7917979517d34f97996 Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Mar 2023 16:39:06 +0000 Subject: Duplicate test name --- tests/ioCmd.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/ioCmd.test b/tests/ioCmd.test index aeb9f87..61b3bdd 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -390,7 +390,7 @@ test iocmd-8.22 {fconfigure command / -nocomplainencoding 0, no error if -strict } -result 0 -test iocmd-8.21 {fconfigure -profile badprofile} -body { +test iocmd-8.23 {fconfigure -profile badprofile} -body { fconfigure stdin -profile froboz } -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} -- cgit v0.12 From 88f18252321544193f8c2aae0eb23f43da96968b Mon Sep 17 00:00:00 2001 From: dgp Date: Mon, 20 Mar 2023 18:45:28 +0000 Subject: Candidate fix for [f3cb2a32d6] Add initialization to allocation of string rep buffer to resolve valgrind reports on use of uninitialized memory --- generic/tclStringObj.c | 1 + 1 file changed, 1 insertion(+) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 328e410..322aed5 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -339,6 +339,7 @@ GrowStringBuffer( } objPtr->bytes = ptr; stringPtr->allocated = attempt; + memset(ptr + objPtr->length, 0, attempt + 1U - objPtr->length); } static void -- cgit v0.12 From 28add0c150860d7b0ddbee462e76b17aa2089862 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 21:06:09 +0000 Subject: More TclNewIndexObj() usages (for values between -1 and SIZE_MAX-1) --- generic/tcl.decls | 2 +- generic/tcl.h | 2 +- generic/tclBasic.c | 35 ++++++++++++++++++----------------- generic/tclEncoding.c | 14 +++++++------- generic/tclIOCmd.c | 5 ++++- tests/icuUcmTests.tcl | 2 +- 6 files changed, 32 insertions(+), 28 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 1608a88..6d0cd3e 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2506,7 +2506,7 @@ declare 658 { int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr) -} +} declare 659 { int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, diff --git a/generic/tcl.h b/generic/tcl.h index 94a4c9b..ae0e19e 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1947,7 +1947,7 @@ typedef struct Tcl_EncodingType { * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note * these are bit masks. * - * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS * DEFINED IN tclEncoding.c (ENCODING_INPUT et al). Be cognizant of this * when adding bits. */ diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 381d127..9b1b5a5 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -9554,24 +9554,25 @@ InjectHandler( Tcl_Obj **objv; if (!isProbe) { - /* - * If this is [coroinject], add the extra arguments now. - */ - - if (nargs == COROUTINE_ARGUMENTS_SINGLE_OPTIONAL) { - Tcl_ListObjAppendElement(NULL, listPtr, - Tcl_NewStringObj("yield", TCL_INDEX_NONE)); - } else if (nargs == COROUTINE_ARGUMENTS_ARBITRARY) { - Tcl_ListObjAppendElement(NULL, listPtr, - Tcl_NewStringObj("yieldto", TCL_INDEX_NONE)); - } else { - /* - * I don't think this is reachable... - */ + /* + * If this is [coroinject], add the extra arguments now. + */ - Tcl_ListObjAppendElement(NULL, listPtr, Tcl_NewWideIntObj((Tcl_WideInt)(nargs + 1U) - 1)); - } - Tcl_ListObjAppendElement(NULL, listPtr, Tcl_GetObjResult(interp)); + if (nargs == COROUTINE_ARGUMENTS_SINGLE_OPTIONAL) { + Tcl_ListObjAppendElement(NULL, listPtr, + Tcl_NewStringObj("yield", TCL_INDEX_NONE)); + } else if (nargs == COROUTINE_ARGUMENTS_ARBITRARY) { + Tcl_ListObjAppendElement(NULL, listPtr, + Tcl_NewStringObj("yieldto", TCL_INDEX_NONE)); + } else { + /* + * I don't think this is reachable... + */ + Tcl_Obj *nargsObj; + TclNewIndexObj(nargsObj, nargs); + Tcl_ListObjAppendElement(NULL, listPtr, nargsObj); + } + Tcl_ListObjAppendElement(NULL, listPtr, Tcl_GetObjResult(interp)); } /* diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index f9a5e6a..1e76dc4 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -553,11 +553,11 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ -/* - * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS +/* + * NOTE: THESE BIT DEFINITIONS SHOULD NOT OVERLAP WITH INTERNAL USE BITS * DEFINED IN tcl.h (TCL_ENCODING_* et al). Be cognizant of this * when adding bits. TODO - should really be defined in a single file. - * + * * To prevent conflicting bits, only define bits within 0xff00 mask here. */ #define TCL_ENCODING_LE 0x100 /* Used to distinguish LE/BE variants */ @@ -2815,7 +2815,7 @@ Utf32ToUtfProc( break; } else if (PROFILE_REPLACE(flags) && SURROGATE(ch)) { ch = UNICODE_REPLACE_CHAR; - } + } /* * Special case for 1-byte utf chars for speed. Make sure we work with @@ -3060,7 +3060,7 @@ Utf16ToUtfProc( numChars--; break; } else if (PROFILE_REPLACE(flags)) { - /* + /* * Previous loop wrote a single byte to mark the high surrogate. * Replace it with the replacement character. Further, restart * current loop iteration since need to recheck destination space @@ -4509,7 +4509,7 @@ TclEncodingProfileNameToId( if (interp) { Tcl_Obj *errorObj; /* This code assumes at least two profiles :-) */ - errorObj = + errorObj = Tcl_ObjPrintf("bad profile name \"%s\": must be", profileName); for (i = 0; i < (numProfiles - 1); ++i) { @@ -4576,7 +4576,7 @@ TclEncodingProfileIdToName( * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile * specified. * - * If no profile or an invalid profile is specified, it is set to + * If no profile or an invalid profile is specified, it is set to * the default. * * Results: diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 6ec5891..cdb8083 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -10,6 +10,7 @@ */ #include "tclInt.h" +#include "tclTomMath.h" /* * Callback structure for accept callback in a TCP server. @@ -330,7 +331,9 @@ Tcl_GetsObjCmd( code = TCL_ERROR; goto done; } - Tcl_SetObjResult(interp, Tcl_NewWideIntObj((Tcl_WideInt)((Tcl_WideUInt)(lineLen + 1U)) - 1)); + Tcl_Obj *lineLenObj; + TclNewIndexObj(lineLenObj, lineLen); + Tcl_SetObjResult(interp, lineLenObj); } else { Tcl_SetObjResult(interp, linePtr); } diff --git a/tests/icuUcmTests.tcl b/tests/icuUcmTests.tcl index 0c4071f..3b70748 100644 --- a/tests/icuUcmTests.tcl +++ b/tests/icuUcmTests.tcl @@ -45,7 +45,7 @@ if {[info commands printable] eq ""} { return $print } } - + # # cp1250 (generated from glibc-CP1250-2.1.2) -- cgit v0.12 From 4ea926a40b6c03000b32b4765503b75a3909dbc0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 22:53:33 +0000 Subject: Proposed fix for [154ed7ce56]: Tcl 9: [gets] on -strictencoding 1 configured channel. Extracted from TIP #657 branch (better keeping bug-fix separate from enhancements) --- generic/tclIO.c | 26 +++++++++++++++++++------- generic/tclIO.h | 2 -- tests/io.test | 20 ++++++++++++++++++-- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3f7fe86..9944787 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4914,6 +4914,19 @@ Tcl_GetsObj( goto done; } goto gotEOL; + } else if (gs.bytesWrote == 0 + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Set eol to the position that caused the encoding error, and then + * coninue to gotEOL, which stores the data that was decoded + * without error to objPtr. This allows the caller to do something + * useful with the data decoded so far, and also results in the + * position of the file being the first byte that was not + * succesfully decoded, allowing further processing at exactly that + * point, if desired. + */ + eol = dstEnd; + goto gotEOL; } dst = dstEnd; } @@ -5030,6 +5043,11 @@ Tcl_GetsObj( } UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && + (copiedTotal == 0 || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + copiedTotal = -1; + } return copiedTotal; } @@ -7534,8 +7552,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; @@ -9751,7 +9768,6 @@ CopyData( * the bottom of the stack. */ - SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9867,7 +9883,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -9959,7 +9974,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -9982,7 +9996,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -10035,7 +10048,6 @@ CopyData( } } } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } diff --git a/generic/tclIO.h b/generic/tclIO.h index 109c770..cdd96ff 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -236,8 +236,6 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ -#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy - * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/tests/io.test b/tests/io.test index c3c0cdd..cf90936 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9155,6 +9155,22 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 +test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + gets $f +} -cleanup { + close $f + removeFile io-75.6 +} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] @@ -9243,10 +9259,10 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.12 } -result 4181 test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { @@ -9262,9 +9278,9 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se set d [read $f] binary scan $d H* hd lappend hd [catch {read $f} msg] - close $f lappend hd $msg } -cleanup { + close $f removeFile io-75.13 } -match glob -result {41 1 {error reading "*": illegal byte sequence}} -- cgit v0.12 From f5c47e4402864aa6d6f5f120c231c39423dcc360 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Mar 2023 23:17:11 +0000 Subject: Proposed fix for [1bedc53c8c]: synchronous [read] with -strictencoding does not produce an error on invalid input --- generic/tclIO.c | 28 +++++++++++++++++++++++++++- tests/io.test | 16 ++++++++++++++++ 2 files changed, 43 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 9944787..7f74e2e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -6078,6 +6078,23 @@ DoReadChars( statePtr->inQueueTail = NULL; } } + + /* + * If CHANNEL_ENCODING_ERROR and CHANNEL_STICKY_EOF are both set, + * then CHANNEL_ENCODING_ERROR was caused by data that occurred + * after the EOF character was encountered, so it doesn't count as + * a real error. + */ + + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && !GotFlag(statePtr, CHANNEL_STICKY_EOF) + && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + /* Channel is blocking. Return an error so that callers + * like [read] can return an error. + */ + Tcl_SetErrno(EILSEQ); + goto finish; + } } if (copiedNow < 0) { @@ -6106,6 +6123,7 @@ DoReadChars( } } +finish: /* * Failure to fill a channel buffer may have left channel reporting a * "blocked" state, but so long as we fulfilled the request here, the @@ -6139,6 +6157,11 @@ DoReadChars( assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED) == (CHANNEL_EOF|CHANNEL_BLOCKED))); UpdateInterest(chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) + && (!copied || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + copied = -1; + } TclChannelRelease((Tcl_Channel)chanPtr); return copied; } @@ -6769,11 +6792,14 @@ TranslateInputEOL( * EOF character was seen in EOL translated range. Leave current file * position pointing at the EOF character, but don't store the EOF * character in the output string. + * + * If CHANNEL_ENCODING_ERROR is set, it can only be because of data + * encountered after the EOF character, so it is nonsense. Unset it. */ SetFlag(statePtr, CHANNEL_EOF | CHANNEL_STICKY_EOF); statePtr->inputEncodingFlags |= TCL_ENCODING_END; - ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR); + ResetFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR|CHANNEL_ENCODING_ERROR); } } diff --git a/tests/io.test b/tests/io.test index cf90936..9246bd8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9171,6 +9171,22 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set removeFile io-75.6 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} +test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + read $f +} -cleanup { + close $f + removeFile io-75.7 +} -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 6ff9d841db20402b12687f25521cedbed683deca Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 08:40:08 +0000 Subject: Missing backslashes in macro def --- generic/tclInt.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/generic/tclInt.h b/generic/tclInt.h index 6aa05a8..50d992c 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -4912,13 +4912,13 @@ MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit; } while (0) #define TclNewIndexObj(objPtr, w) \ - do { - Tcl_WideUInt _uw = (Tcl_WideUInt)w; - if (_uw >= TCL_INDEX_NONE) { - TclNewIntObj(objPtr, -1); - } else { - TclNewUIntObj(objPtr, _uw); - } + do { \ + Tcl_WideUInt _uw = (Tcl_WideUInt)(w); \ + if (_uw >= TCL_INDEX_NONE) { \ + TclNewIntObj(objPtr, -1); \ + } else { \ + TclNewUIntObj(objPtr, _uw); \ + } \ } while (0) #define TclNewDoubleObj(objPtr, d) \ -- cgit v0.12 From 48dcbfcc5b65ce91d157d0faa2db21f6035879e9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 11:11:02 +0000 Subject: Some test-cases, which test for partial read without throwing EILSEQ immediately, only work with ""-blocking 0". That's expected. --- tests/io.test | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/io.test b/tests/io.test index 9246bd8..58d276b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9110,10 +9110,10 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -se fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.3 } -result 41c0 @@ -9148,10 +9148,10 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.5 } -result 4181 @@ -9234,10 +9234,10 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { fconfigure $f -encoding utf-8 -buffering none } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.10 } -result 41c0 # The current result returns the orphan byte as byte. @@ -9254,7 +9254,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict + fconfigure $f -encoding shiftjis -blocking 0 -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9289,7 +9289,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict + fconfigure $f -encoding utf-8 -blocking 0 -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd -- cgit v0.12 From d63d524e1d45f80c027a4a10aa4f2a51fd8e3f04 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 16:26:30 +0000 Subject: Fix indenting. More use of TCL_INDEX_NONE --- generic/tclEncoding.c | 282 ++++++++++++++++++++++++-------------------------- 1 file changed, 136 insertions(+), 146 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 93e4171..7c04a61 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -801,7 +801,7 @@ Tcl_SetDefaultEncodingDir( const char *path) { Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath(); - Tcl_Obj *directory = Tcl_NewStringObj(path, -1); + Tcl_Obj *directory = Tcl_NewStringObj(path, TCL_INDEX_NONE); searchPath = Tcl_DuplicateObj(searchPath); Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); @@ -997,7 +997,7 @@ Tcl_GetEncodingNames( Encoding *encodingPtr = (Encoding *)Tcl_GetHashValue(hPtr); Tcl_CreateHashEntry(&table, - Tcl_NewStringObj(encodingPtr->name, -1), &dummy); + Tcl_NewStringObj(encodingPtr->name, TCL_INDEX_NONE), &dummy); } Tcl_MutexUnlock(&encodingMutex); @@ -1261,7 +1261,7 @@ Tcl_ExternalToUtfDString( *------------------------------------------------------------------------- */ -Tcl_Size +int Tcl_ExternalToUtfDStringEx( Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the source string, or NULL @@ -1279,8 +1279,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; - Tcl_Size dstLen; + int result, srcRead, dstWrote, dstChars; + Tcl_Size dstLen, soFar; const char *srcStart = src; /* DO FIRST - Must always be initialized before returning */ @@ -1292,7 +1292,7 @@ Tcl_ExternalToUtfDStringEx( interp, Tcl_NewStringObj( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", - -1)); + TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); return TCL_ERROR; } @@ -1301,7 +1301,7 @@ Tcl_ExternalToUtfDStringEx( dstLen = dstPtr->spaceAvl - 1; if (encoding == NULL) { - encoding = systemEncoding; + encoding = systemEncoding; } encodingPtr = (Encoding *)encoding; @@ -1317,50 +1317,49 @@ Tcl_ExternalToUtfDStringEx( } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); - - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - Tcl_Size nBytesProcessed = (src - srcStart); - - Tcl_DStringSetLength(dstPtr, soFar); - if (errorLocPtr) { - /* - * Do not write error message into interpreter if caller - * wants to know error location. - */ - *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; - } - else { - /* Caller wants error message on failure */ - if (result != TCL_OK && interp != NULL) { - char buf[TCL_INTEGER_SPACE]; - sprintf(buf, "%u", nBytesProcessed); - Tcl_SetObjResult( - interp, - Tcl_ObjPrintf("unexpected byte sequence starting at index %" - "u: '\\x%02X'", - nBytesProcessed, - UCHAR(srcStart[nBytesProcessed]))); - Tcl_SetErrorCode( - interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); - } - } - return result; - } - - /* Expand space and continue */ - flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; - if (Tcl_DStringLength(dstPtr) == 0) { - Tcl_DStringSetLength(dstPtr, dstLen); - } - Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); - dst = Tcl_DStringValue(dstPtr) + soFar; - dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcLen, flags, &state, dst, dstLen, + &srcRead, &dstWrote, &dstChars); + soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + + src += srcRead; + if (result != TCL_CONVERT_NOSPACE) { + Tcl_Size nBytesProcessed = (src - srcStart); + + Tcl_DStringSetLength(dstPtr, soFar); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%u", nBytesProcessed); + Tcl_SetObjResult( + interp, + Tcl_ObjPrintf("unexpected byte sequence starting at index %" + "u: '\\x%02X'", + nBytesProcessed, + UCHAR(srcStart[nBytesProcessed]))); + Tcl_SetErrorCode( + interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL); + } + } + return result; + } + + /* Expand space and continue */ + flags &= ~TCL_ENCODING_START; + srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { + Tcl_DStringSetLength(dstPtr, dstLen); + } + Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1); + dst = Tcl_DStringValue(dstPtr) + soFar; + dstLen = Tcl_DStringLength(dstPtr) - soFar - 1; } } @@ -1447,9 +1446,9 @@ Tcl_ExternalToUtf( } if (!noTerminate) { - if (dstLen < 1) { - return TCL_CONVERT_NOSPACE; - } + if (dstLen < 1) { + return TCL_CONVERT_NOSPACE; + } /* * If there are any null characters in the middle of the buffer, * they will converted to the UTF-8 null character (\xC0\x80). To get @@ -1459,9 +1458,9 @@ Tcl_ExternalToUtf( dstLen--; } else { - if (dstLen < 0) { - return TCL_CONVERT_NOSPACE; - } + if (dstLen < 0) { + return TCL_CONVERT_NOSPACE; + } } if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; @@ -1518,7 +1517,7 @@ Tcl_UtfToExternalDString( * converted string is stored. */ { Tcl_UtfToExternalDStringEx( - NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL); + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1562,7 +1561,7 @@ Tcl_UtfToExternalDString( *------------------------------------------------------------------------- */ -Tcl_Size +int Tcl_UtfToExternalDStringEx( Tcl_Interp *interp, /* For error messages. May be NULL. */ Tcl_Encoding encoding, /* The encoding for the converted string, or @@ -1580,9 +1579,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; + int result, srcRead, dstWrote, dstChars; const char *srcStart = src; - Tcl_Size dstLen; + Tcl_Size dstLen, soFar; /* DO FIRST - must always be initialized on return */ Tcl_DStringInit(dstPtr); @@ -1593,7 +1592,7 @@ Tcl_UtfToExternalDStringEx( interp, Tcl_NewStringObj( "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.", - -1)); + TCL_INDEX_NONE)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL); return TCL_ERROR; } @@ -1615,32 +1614,31 @@ Tcl_UtfToExternalDStringEx( flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); + srcLen, flags, &state, dst, dstLen, + &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); src += srcRead; if (result != TCL_CONVERT_NOSPACE) { - Tcl_Size nBytesProcessed = (src - srcStart); + Tcl_Size nBytesProcessed = (src - srcStart); int i = soFar + encodingPtr->nullSize - 1; while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - if (errorLocPtr) { - /* - * Do not write error message into interpreter if caller - * wants to know error location. - */ - *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; - } - else { - /* Caller wants error message on failure */ - if (result != TCL_OK && interp != NULL) { - int pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); - int ucs4; - char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); - sprintf(buf, "%u", nBytesProcessed); + if (errorLocPtr) { + /* + * Do not write error message into interpreter if caller + * wants to know error location. + */ + *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed; + } else { + /* Caller wants error message on failure */ + if (result != TCL_OK && interp != NULL) { + int pos = Tcl_NumUtfChars(srcStart, nBytesProcessed); + int ucs4; + char buf[TCL_INTEGER_SPACE]; + TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4); + sprintf(buf, "%u", nBytesProcessed); Tcl_SetObjResult( interp, Tcl_ObjPrintf( @@ -1648,10 +1646,10 @@ Tcl_UtfToExternalDStringEx( pos, ucs4)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - } - } - return result; + buf, NULL); + } + } + return result; } flags &= ~TCL_ENCODING_START; @@ -1742,7 +1740,7 @@ Tcl_UtfToExternal( } if (dstLen < encodingPtr->nullSize) { - return TCL_CONVERT_NOSPACE; + return TCL_CONVERT_NOSPACE; } dstLen -= encodingPtr->nullSize; result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, @@ -1811,7 +1809,7 @@ OpenEncodingFileChannel( const char *name) /* The name of the encoding file on disk and * also the name for new encoding. */ { - Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1); + Tcl_Obj *nameObj = Tcl_NewStringObj(name, TCL_INDEX_NONE); Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj); Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath()); Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); @@ -1821,7 +1819,7 @@ OpenEncodingFileChannel( TclListObjGetElementsM(NULL, searchPath, &numDirs, &dir); Tcl_IncrRefCount(nameObj); - Tcl_AppendToObj(fileNameObj, ".enc", -1); + Tcl_AppendToObj(fileNameObj, ".enc", TCL_INDEX_NONE); Tcl_IncrRefCount(fileNameObj); Tcl_DictObjGet(NULL, map, nameObj, &directory); @@ -2551,19 +2549,16 @@ UtfToUtfProc( } if (UCHAR(*src) < 0x80 && !((UCHAR(*src) == 0) && (flags & ENCODING_INPUT))) { /* - * Copy 7bit characters, but skip null-bytes when target encoding - * is Tcl's "modified" UTF-8. These need to be converted to - * \xC0\x80 as is done in a later branch. + * Copy 7bit characters, but skip null-bytes when we are in input + * mode, so that they get converted to \xC0\x80. */ - *dst++ = *src++; - } - else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && + } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) && (UCHAR(src[1]) == 0x80) && (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) || PROFILE_REPLACE(profile))) { /* Special sequence \xC0\x80 */ - if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) { + if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) { if (PROFILE_REPLACE(profile)) { dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); src += 2; @@ -2581,8 +2576,7 @@ UtfToUtfProc( src += 2; } - } - else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Incomplete byte sequence. * Always check before using TclUtfToUCS4. Not doing can so @@ -2599,34 +2593,32 @@ UtfToUtfProc( : TCL_CONVERT_SYNTAX; break; } - } - if (PROFILE_REPLACE(profile)) { - ch = UNICODE_REPLACE_CHAR; - ++src; - } else { - /* TCL_ENCODING_PROFILE_TCL8 */ - char chbuf[2]; - chbuf[0] = UCHAR(*src++); chbuf[1] = 0; - TclUtfToUCS4(chbuf, &ch); - } + } + if (PROFILE_REPLACE(profile)) { + ch = UNICODE_REPLACE_CHAR; + ++src; + } else { + /* TCL_ENCODING_PROFILE_TCL8 */ + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); + } dst += Tcl_UniCharToUtf(ch, dst); - } - else { + } else { int low; - int isInvalid = 0; + int isInvalid = 0; size_t len = TclUtfToUCS4(src, &ch); if (flags & ENCODING_INPUT) { if ((len < 2) && (ch != 0)) { - isInvalid = 1; + isInvalid = 1; } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) { - isInvalid = 1; + isInvalid = 1; } if (isInvalid) { if (PROFILE_STRICT(profile)) { result = TCL_CONVERT_SYNTAX; break; - } - else if (PROFILE_REPLACE(profile)) { + } else if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; } } @@ -2655,8 +2647,7 @@ UtfToUtfProc( } if (PROFILE_REPLACE(profile)) { ch = UNICODE_REPLACE_CHAR; - } - else { + } else { low = ch; len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0; @@ -2684,7 +2675,7 @@ cesu8: src = saveSrc; break; } else if (PROFILE_STRICT(profile) && - (flags & ENCODING_INPUT) && + (flags & ENCODING_INPUT) && SURROGATE(ch)) { result = TCL_CONVERT_SYNTAX; src = saveSrc; @@ -2755,7 +2746,7 @@ Utf32ToUtfProc( * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ if (bytesLeft != 0) { - /* We have a truncated code unit */ + /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen -= bytesLeft; } @@ -2832,21 +2823,21 @@ Utf32ToUtfProc( } if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { - /* We have a code fragment left-over at the end */ + /* We have a code fragment left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { /* destination is not full, so we really are at the end now */ - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src += bytesLeft; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src += bytesLeft; /* Go past truncated code unit */ + } + } } *srcReadPtr = src - srcStart; @@ -3096,16 +3087,16 @@ Utf16ToUtfProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src++; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; /* Go past truncated code unit */ + } + } } *srcReadPtr = src - srcStart; @@ -3297,8 +3288,8 @@ UtfToUcs2Proc( len = TclUtfToUniChar(src, &ch); if ((ch >= 0xD800) && (len < 3)) { if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_UNKNOWN; - break; + result = TCL_CONVERT_UNKNOWN; + break; } src += len; src += TclUtfToUniChar(src, &ch); @@ -3308,8 +3299,8 @@ UtfToUcs2Proc( len = TclUtfToUniChar(src, &ch); if (ch > 0xFFFF) { if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_UNKNOWN; - break; + result = TCL_CONVERT_UNKNOWN; + break; } ch = UNICODE_REPLACE_CHAR; } @@ -4559,8 +4550,7 @@ int TclEncodingSetProfileFlags(int flags) { if (flags & TCL_ENCODING_STOPONERROR) { TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); - } - else { + } else { int profile = TCL_ENCODING_PROFILE_GET(flags); switch (profile) { case TCL_ENCODING_PROFILE_TCL8: @@ -4594,13 +4584,13 @@ int TclEncodingSetProfileFlags(int flags) void TclGetEncodingProfiles(Tcl_Interp *interp) { - int i, n; + size_t i, n; Tcl_Obj *objPtr; n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); objPtr = Tcl_NewListObj(n, NULL); for (i = 0; i < n; ++i) { Tcl_ListObjAppendElement( - interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, -1)); + interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, TCL_INDEX_NONE)); } Tcl_SetObjResult(interp, objPtr); } -- cgit v0.12 From 0fcb3ce5dcddcc07e9c3f294dc146a4442e9efbb Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 21 Mar 2023 23:49:01 +0000 Subject: Remove unneeded Tcl_IncrRefCount and TclDecrRefCount. TclPtrSetVarIdx takes ownership of newValuePtr if its refCount is 0, and either stores or frees it. --- generic/tclExecute.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 7ee5471..41ce6f0 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -3585,10 +3585,8 @@ TEBCresume( } } DECACHE_STACK_INFO(); - Tcl_IncrRefCount(valueToAssign); objResultPtr = TclPtrSetVarIdx(interp, varPtr, arrayPtr, part1Ptr, part2Ptr, valueToAssign, TCL_LEAVE_ERR_MSG, opnd); - TclDecrRefCount(valueToAssign); CACHE_STACK_INFO(); if (!objResultPtr) { errorInLappendListPtr: -- cgit v0.12 From ac8ac73e77b6dd4d81b5ff1d2bc9b5d9dd23e576 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 21 Mar 2023 23:55:12 +0000 Subject: Remove unneeded Tcl_IncrRefCount and TclDecrRefCount. TclPtrSetVarIdx takes ownership of newValuePtr if its refCount is 0, and either stores or frees it. --- generic/tclExecute.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 97122b9..a2578e4 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -3444,10 +3444,8 @@ TEBCresume( } } DECACHE_STACK_INFO(); - Tcl_IncrRefCount(valueToAssign); objResultPtr = TclPtrSetVarIdx(interp, varPtr, arrayPtr, part1Ptr, part2Ptr, valueToAssign, TCL_LEAVE_ERR_MSG, opnd); - TclDecrRefCount(valueToAssign); CACHE_STACK_INFO(); if (!objResultPtr) { errorInLappendListPtr: -- cgit v0.12 From e7c416995e6a3981e4f32640a86fd24b124f9480 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 22 Mar 2023 02:39:10 +0000 Subject: Fix longIs64Bit->has64BitLengths else test will not run on 64-bit Wind --- tests/lseq.test | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tests/lseq.test b/tests/lseq.test index a280069..1dff72d 100644 --- a/tests/lseq.test +++ b/tests/lseq.test @@ -17,7 +17,7 @@ if {"::tcltest" ni [namespace children]} { testConstraint arithSeriesDouble 1 testConstraint arithSeriesShimmer 1 testConstraint arithSeriesShimmerOk 1 -testConstraint longIs64bit [expr {$tcl_platform(wordSize) == 8}] +testConstraint has64BitLengths [expr {$tcl_platform(pointerSize) == 8}] ## Arg errors test lseq-1.1 {error cases} -body { @@ -483,7 +483,7 @@ test lseq-4.3 {TIP examples} { # # Ticket 9933cc4d88697f05976accebd31c1e3ba6efe9c6 - lseq corner case -test lseq-4.4 {lseq corner case} -constraints longIs64bit -body { +test lseq-4.4 {lseq corner case} -constraints has64BitLengths -body { set tcmd { set res {} set s [catch {lindex [lseq 10 100] 0} e] @@ -507,6 +507,9 @@ test lseq-4.4 {lseq corner case} -constraints longIs64bit -body { test lseq-4.5 {lindex off by one} -body { lappend res [eval {lindex [lseq 1 4] end}] lappend res [eval {lindex [lseq 1 4] end-1}] +} -setup { + # Since 4.3 does not clean up and 4.4 may not run under constraint + set res {} } -cleanup { unset res } -result {4 3} -- cgit v0.12 From 133af7524b7bdfc62eb504a932faef09a0ae03e9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 08:32:10 +0000 Subject: Since TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8 (on Tcl 8), we can simplify. --- generic/tcl.h | 5 ----- generic/tclCmdAH.c | 4 ---- generic/tclEncoding.c | 10 +++------- generic/tclIO.c | 4 ++-- 4 files changed, 5 insertions(+), 18 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index e66607b..4da5f43 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2149,12 +2149,7 @@ typedef struct Tcl_EncodingType { (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ (flags_) |= profile_; \ } while (0) -/* Still being argued - For Tcl9, is the default strict? TODO */ -#if TCL_MAJOR_VERSION < 9 #define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ -#endif /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 6c46c8e..1a1b060 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -567,11 +567,7 @@ EncodingConvertParseOptions ( Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ -#else int profile = TCL_ENCODING_PROFILE_TCL8; -#endif /* * Possible combinations: diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7c04a61..fc62d7c 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -200,14 +200,10 @@ static struct TclEncodingProfiles { {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) #define PROFILE_REPLACE(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -4559,7 +4555,7 @@ int TclEncodingSetProfileFlags(int flags) break; case 0: /* Unspecified by caller */ default: - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_TCL8); break; } } diff --git a/generic/tclIO.c b/generic/tclIO.c index 7f74e2e..b574e0d 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1703,11 +1703,11 @@ Tcl_CreateChannel( statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_TCL8); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_TCL8); /* * Set the channel up initially in AUTO input translation mode to accept -- cgit v0.12 From 44357a25341fc6b531fda7e2d69f83c05ad7702d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 09:41:00 +0000 Subject: code cleanup: use more *SURROGATE() macro's --- generic/tclEncoding.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fc62d7c..fc9d241 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2778,7 +2778,7 @@ Utf32ToUtfProc( } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } - if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) { /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -2805,7 +2805,7 @@ Utf32ToUtfProc( if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); } else { - if (((prev & ~0x3FF) != 0xD800) && ((ch & ~0x3FF) == 0xDC00)) { + if (!HIGH_SURROGATE(prev) && LOW_SURROGATE(ch)) { *dst = 0; /* In case of lower surrogate, don't try to combine */ } dst += Tcl_UniCharToUtf(ch, dst); @@ -2813,7 +2813,7 @@ Utf32ToUtfProc( src += 4; } - if ((ch & ~0x3FF) == 0xD800) { + if (HIGH_SURROGATE(ch)) { /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -3031,7 +3031,7 @@ Utf16ToUtfProc( } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } - if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ @@ -3050,9 +3050,9 @@ Utf16ToUtfProc( if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); - } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { + } else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) { dst += Tcl_UniCharToUtf(ch, dst); - } else if (((ch & ~0x3FF) == 0xDC00) && PROFILE_STRICT(flags)) { + } else if (LOW_SURROGATE(ch) && PROFILE_STRICT(flags)) { /* Lo surrogate not preceded by Hi surrogate */ result = TCL_CONVERT_SYNTAX; break; @@ -3063,7 +3063,7 @@ Utf16ToUtfProc( src += sizeof(unsigned short); } - if ((ch & ~0x3FF) == 0xD800) { + if (HIGH_SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; src -= 2; @@ -3301,7 +3301,7 @@ UtfToUcs2Proc( ch = UNICODE_REPLACE_CHAR; } #endif - if (PROFILE_STRICT(flags) && ((ch & ~0x7FF) == 0xD800)) { + if (PROFILE_STRICT(flags) && SURROGATE(ch)) { result = TCL_CONVERT_SYNTAX; break; } -- cgit v0.12 From f1fafe7c16c654a9f7f65644db877071980b8a5d Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 11:44:33 +0000 Subject: Move (TCL_ENCODING_PROFILE_MASK|GET|SET) from tcl.h to tclIO.h, since those are not public. Some formatting. --- generic/tcl.h | 7 ------- generic/tclCmdAH.c | 5 ++--- generic/tclEncoding.c | 12 ++++++------ generic/tclExecute.c | 6 ++---- generic/tclIO.c | 24 ++++++++++++------------ generic/tclIO.h | 7 +++++++ generic/tclTest.c | 3 +-- generic/tclTestObj.c | 3 +-- generic/tclZlib.c | 6 ++---- win/tclWinConsole.c | 3 +-- 10 files changed, 34 insertions(+), 42 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 4da5f43..9140ec4 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2142,13 +2142,6 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_PROFILE_TCL8 0x01000000 #define TCL_ENCODING_PROFILE_STRICT 0x02000000 #define TCL_ENCODING_PROFILE_REPLACE 0x03000000 -#define TCL_ENCODING_PROFILE_MASK 0xFF000000 -#define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) -#define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ - do { \ - (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ - (flags_) |= profile_; \ - } while (0) #define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 1a1b060..5c27bbc 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -675,11 +675,10 @@ EncodingConvertfromObjCmd( * Convert the string into a byte array in 'ds'. */ #if !defined(TCL_NO_DEPRECATED) && (TCL_MAJOR_VERSION < 9) - if (TCL_ENCODING_PROFILE_GET(flags) == TCL_ENCODING_PROFILE_TCL8) { + if (CHANNEL_PROFILE_GET(flags) == TCL_ENCODING_PROFILE_TCL8) { /* Permits high bits to be non-0 in byte array (Tcl 8 style) */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); - } - else + } else #endif bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index fc9d241..b472db3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -200,10 +200,10 @@ static struct TclEncodingProfiles { {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ - (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) + (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) #define PROFILE_REPLACE(flags_) \ - (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) + (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -2527,7 +2527,7 @@ UtfToUtfProc( flags |= PTR2INT(clientData); dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); - profile = TCL_ENCODING_PROFILE_GET(flags); + profile = CHANNEL_PROFILE_GET(flags); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -4545,9 +4545,9 @@ TclEncodingProfileIdToName( int TclEncodingSetProfileFlags(int flags) { if (flags & TCL_ENCODING_STOPONERROR) { - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); + CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); } else { - int profile = TCL_ENCODING_PROFILE_GET(flags); + int profile = CHANNEL_PROFILE_GET(flags); switch (profile) { case TCL_ENCODING_PROFILE_TCL8: case TCL_ENCODING_PROFILE_STRICT: @@ -4555,7 +4555,7 @@ int TclEncodingSetProfileFlags(int flags) break; case 0: /* Unspecified by caller */ default: - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_TCL8); + CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_TCL8); break; } } diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 41ce6f0..d4e9796 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5279,8 +5279,7 @@ TEBCresume( } if (fromIdx == TCL_INDEX_NONE) { fromIdx = 0; - } - else if (fromIdx > length) { + } else if (fromIdx > length) { fromIdx = length; } numToDelete = 0; @@ -5317,8 +5316,7 @@ TEBCresume( } TRACE_APPEND(("\"%.30s\"\n", O2S(objResultPtr))); NEXT_INST_V(6, opnd, 1); - } - else { + } else { if (Tcl_ListObjReplace(interp, valuePtr, fromIdx, diff --git a/generic/tclIO.c b/generic/tclIO.c index b574e0d..6207f6e 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1702,11 +1702,11 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, TCL_ENCODING_PROFILE_TCL8); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags, TCL_ENCODING_PROFILE_TCL8); /* @@ -8060,7 +8060,7 @@ Tcl_GetChannelOption( Tcl_DStringAppendElement(dsPtr, "-profile"); } /* Note currently input and output profiles are same */ - profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); + profile = CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags); profileName = TclEncodingProfileIdToName(interp, profile); if (profileName == NULL) { return TCL_ERROR; @@ -8266,12 +8266,12 @@ Tcl_SetChannelOption( Tcl_FreeEncoding(statePtr->encoding); statePtr->encoding = encoding; statePtr->inputEncodingState = NULL; - profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); + profile = CHANNEL_PROFILE_GET(statePtr->inputEncodingFlags); statePtr->inputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, profile); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); /* Same as input */ + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags, profile); /* Same as input */ ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; @@ -8335,8 +8335,8 @@ Tcl_SetChannelOption( if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; } - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile); - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, profile); + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags, profile); ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); return TCL_OK; } else if (HaveOpt(1, "-translation")) { @@ -9468,8 +9468,8 @@ TclCopyChannel( && inStatePtr->inputTranslation == TCL_TRANSLATE_LF && outStatePtr->outputTranslation == TCL_TRANSLATE_LF && inStatePtr->encoding == outStatePtr->encoding - && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT - && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; + && CHANNEL_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && CHANNEL_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; /* * Allocate a new CopyState to maintain info about the current copy in @@ -9797,8 +9797,8 @@ CopyData( inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding - && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT - && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; + && CHANNEL_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT + && CHANNEL_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8; if (!(inBinary || sameEncoding)) { TclNewObj(bufObj); diff --git a/generic/tclIO.h b/generic/tclIO.h index cdd96ff..5d02569 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -286,6 +286,13 @@ typedef struct ChannelState { #define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed. * No further Tcl-level write IO on * the channel is allowed. */ +#define CHANNEL_PROFILE_MASK 0xFF000000 +#define CHANNEL_PROFILE_GET(flags_) ((flags_) & CHANNEL_PROFILE_MASK) +#define CHANNEL_PROFILE_SET(flags_, profile_) \ + do { \ + (flags_) &= ~CHANNEL_PROFILE_MASK; \ + (flags_) |= profile_; \ + } while (0) /* * The length of time to wait between synthetic timer events. Must be zero or diff --git a/generic/tclTest.c b/generic/tclTest.c index f68029a..442260b 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2103,8 +2103,7 @@ static int UtfExtWrapper( int flag; if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { flags |= flag; - } - else { + } else { int idx; if (Tcl_GetIndexFromObjStruct(interp, flagObjs[i], diff --git a/generic/tclTestObj.c b/generic/tclTestObj.c index 66657d9..b4c6ac3 100644 --- a/generic/tclTestObj.c +++ b/generic/tclTestObj.c @@ -1113,8 +1113,7 @@ TestobjCmd( if (objv[2]->typePtr == NULL) { Tcl_SetObjResult(interp, Tcl_NewStringObj("none", -1)); - } - else { + } else { typeName = objv[2]->typePtr->name; if (!strcmp(typeName, "utf32string")) typeName = "string"; diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 3182c27..5afe1ed 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -453,8 +453,7 @@ GenerateHeader( if (result == TCL_CONVERT_UNKNOWN) { Tcl_AppendResult( interp, "Comment contains characters > 0xFF", NULL); - } - else { + } else { Tcl_AppendResult(interp, "Comment too large for zip", NULL); } } @@ -489,8 +488,7 @@ GenerateHeader( if (result == TCL_CONVERT_UNKNOWN) { Tcl_AppendResult( interp, "Filename contains characters > 0xFF", NULL); - } - else { + } else { Tcl_AppendResult( interp, "Filename too large for zip", NULL); } diff --git a/win/tclWinConsole.c b/win/tclWinConsole.c index 6688ab1..c93c3e4 100644 --- a/win/tclWinConsole.c +++ b/win/tclWinConsole.c @@ -882,8 +882,7 @@ ConsoleCheckProc( */ handleInfoPtr->flags |= CONSOLE_DATA_AWAITED; WakeConditionVariable(&handleInfoPtr->consoleThreadCV); - } - else if (chanInfoPtr->watchMask & TCL_WRITABLE) { + } else if (chanInfoPtr->watchMask & TCL_WRITABLE) { if (RingBufferHasFreeSpace(&handleInfoPtr->buffer)) { needEvent = 1; /* Output space available */ } -- cgit v0.12 From 636a6d0ea9adb390d44601c064d8e9e134d83583 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 16:36:22 +0000 Subject: Proposed fix for [0265750233]: invalid read in cmdAH-4.3.13.C1.solo.utf-8.tcl8. --- generic/tclUtf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index cb8bb3e..f0135e4 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -464,7 +464,7 @@ Tcl_UtfToUniChar( } return 1; } else if (byte < 0xE0) { - if ((src[1] & 0xC0) == 0x80) { + if ((byte != 0xC1) && (src[1] & 0xC0) == 0x80) { /* * Two-byte-character lead-byte followed by a trail-byte. */ -- cgit v0.12 From 6fec8f2b6ceb11f6c1cfe52126e45005b4376d98 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 18:13:29 +0000 Subject: Let's get in the 'readability' changes from the 'unchained' branch, without the need for all those partial merges. --- generic/tclUtf.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 1a6ab68..b153dc9 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -182,8 +182,8 @@ Invalid( * * Tcl_UniCharToUtf -- * - * Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the - * provided buffer. Equivalent to Plan 9 runetochar(). + * Stores the given Tcl_UniChar as a sequence of UTF-8 bytes in the provided + * buffer. Equivalent to Plan 9 runetochar(). * * Surrogate pairs are handled as follows: When ch is a high surrogate, * the first byte of the 4-byte UTF-8 sequence is stored in the buffer and @@ -191,10 +191,9 @@ Invalid( * surrogate and the same buffer, the remaining 3 bytes of the 4-byte * UTF-8 sequence are produced. * - * If no low surrogate follows the high surrogate (which is actually - * illegal), this can be handled reasonably by calling Tcl_UniCharToUtf - * again with ch = -1. This produces a 3-byte UTF-8 sequence - * representing the high surrogate. + * If no low surrogate follows the high surrogate (which is actually illegal), + * calling Tcl_UniCharToUtf again with ch = -1 produces a 3-byte UTF-8 + * sequence representing the high surrogate. * * Results: * Returns the number of bytes stored into the buffer. @@ -208,12 +207,11 @@ Invalid( #undef Tcl_UniCharToUtf size_t Tcl_UniCharToUtf( - int ch, /* The Tcl_UniChar to be stored in the + int ch, /* The Tcl_UniChar to be stored in the * buffer. Can be or'ed with flag TCL_COMBINE */ - char *buf) /* Buffer in which the UTF-8 representation of - * the Tcl_UniChar is stored. Buffer must be - * large enough to hold the UTF-8 character - * (at most 4 bytes). */ + char *buf) /* Buffer in which the UTF-8 representation of + * the ch is stored. Must be large enough to hold the UTF-8 + * character (at most 4 bytes). */ { #if TCL_UTF_MAX > 3 int flags = ch; -- cgit v0.12 From f4a64c89a2d0d854544f1a2bba43bca04c8268ea Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 19:30:02 +0000 Subject: Forgot one line in previous commit, and indenting --- generic/tclUtf.c | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b153dc9..1fb8847 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -192,7 +192,7 @@ Invalid( * UTF-8 sequence are produced. * * If no low surrogate follows the high surrogate (which is actually illegal), - * calling Tcl_UniCharToUtf again with ch = -1 produces a 3-byte UTF-8 + * calling Tcl_UniCharToUtf again with ch being -1 produces a 3-byte UTF-8 * sequence representing the high surrogate. * * Results: @@ -207,11 +207,13 @@ Invalid( #undef Tcl_UniCharToUtf size_t Tcl_UniCharToUtf( - int ch, /* The Tcl_UniChar to be stored in the - * buffer. Can be or'ed with flag TCL_COMBINE */ - char *buf) /* Buffer in which the UTF-8 representation of - * the ch is stored. Must be large enough to hold the UTF-8 - * character (at most 4 bytes). */ + int ch, /* The Tcl_UniChar to be stored in the + * buffer. Can be or'ed with flag TCL_COMBINE + */ + char *buf) /* Buffer in which the UTF-8 representation of + * ch is stored. Must be large enough to hold the UTF-8 + * character (at most 4 bytes). + */ { #if TCL_UTF_MAX > 3 int flags = ch; @@ -248,7 +250,12 @@ Tcl_UniCharToUtf( /* Previous Tcl_UniChar was not a high surrogate, so just output */ } else { /* High surrogate */ + + /* Add 0x10000 to the raw number encoded in the surrogate + * pair in order to get the code point. + */ ch += 0x40; + /* Fill buffer with specific 3-byte (invalid) byte combination, so following low surrogate can recognize it and combine */ buf[2] = (char) ((ch << 4) & 0x30); -- cgit v0.12 From 17937238e2e0cd2560c5fdaa676ce36b64ab450e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 20:09:09 +0000 Subject: One missing int -> Tcl_Size change --- generic/tcl.decls | 2 +- generic/tclPlatDecls.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 51d9ee5..7f7fafb 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2610,7 +2610,7 @@ declare 0 macosx { declare 1 macosx { int Tcl_MacOSXOpenVersionedBundleResources(Tcl_Interp *interp, const char *bundleName, const char *bundleVersion, - int hasResourceFile, int maxPathLen, char *libraryPath) + int hasResourceFile, Tcl_Size maxPathLen, char *libraryPath) } declare 2 macosx { void Tcl_MacOSXNotifierAddRunLoopMode(const void *runLoopMode) diff --git a/generic/tclPlatDecls.h b/generic/tclPlatDecls.h index f2bc0da..659c3e6 100644 --- a/generic/tclPlatDecls.h +++ b/generic/tclPlatDecls.h @@ -78,7 +78,7 @@ EXTERN int Tcl_MacOSXOpenBundleResources(Tcl_Interp *interp, EXTERN int Tcl_MacOSXOpenVersionedBundleResources( Tcl_Interp *interp, const char *bundleName, const char *bundleVersion, - int hasResourceFile, int maxPathLen, + int hasResourceFile, Tcl_Size maxPathLen, char *libraryPath); /* 2 */ EXTERN void Tcl_MacOSXNotifierAddRunLoopMode( @@ -97,7 +97,7 @@ typedef struct TclPlatStubs { #endif /* WIN */ #ifdef MAC_OSX_TCL /* MACOSX */ int (*tcl_MacOSXOpenBundleResources) (Tcl_Interp *interp, const char *bundleName, int hasResourceFile, int maxPathLen, char *libraryPath); /* 0 */ - int (*tcl_MacOSXOpenVersionedBundleResources) (Tcl_Interp *interp, const char *bundleName, const char *bundleVersion, int hasResourceFile, int maxPathLen, char *libraryPath); /* 1 */ + int (*tcl_MacOSXOpenVersionedBundleResources) (Tcl_Interp *interp, const char *bundleName, const char *bundleVersion, int hasResourceFile, Tcl_Size maxPathLen, char *libraryPath); /* 1 */ void (*tcl_MacOSXNotifierAddRunLoopMode) (const void *runLoopMode); /* 2 */ #endif /* MACOSX */ } TclPlatStubs; -- cgit v0.12 From 7f3849495af70609d977a45ab829c6bc9291b965 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 22 Mar 2023 20:15:47 +0000 Subject: Merge core-8-branch 7fde86610c: New script used in the "valgrind_each" target in Makefile.in. --- tools/valgrind_check_success | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 tools/valgrind_check_success diff --git a/tools/valgrind_check_success b/tools/valgrind_check_success new file mode 100644 index 0000000..24830d5 --- /dev/null +++ b/tools/valgrind_check_success @@ -0,0 +1,30 @@ +#! /usr/bin/env tclsh + + +proc main {sourcetype source} { + switch $sourcetype { + file { + set chan [open $source] + try { + set data [read $chan] + } finally { + close $chan + } + } + string { + set data $source + } + default { + error [list {wrong # args}] + } + } + set found [regexp -inline -all {blocks are\ + (?:(?:(?:definitely|indirectly|possibly) lost)|still reachable)} $data] + if {[llength $found]} { + puts 0 + } else { + puts 1 + } + flush stdout +} +main {*}$argv -- cgit v0.12 From 9d432523a6f04ed087547fa81ea1c502314bcd37 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Wed, 22 Mar 2023 20:26:05 +0000 Subject: Merge core-8-branch 3082cb9e80: Make valgrind_foreach target in Makefile.in properly handle interrupted tests. --- unix/Makefile.in | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/unix/Makefile.in b/unix/Makefile.in index 93c3abd..1bf5814 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -956,6 +956,27 @@ valgrind: ${TCL_EXE} ${TCLTEST_EXE} $(TOP_DIR)/tests/all.tcl -singleproc 1 -constraints valgrind \ $(TESTFLAGS) +testresults/valgrind/%.result: ${TCL_EXE} ${TCLTEST_EXE} + @mkdir -p testresults/valgrind + $(SHELL_ENV) $(VALGRIND) $(VALGRINDARGS) ./${TCLTEST_EXE} \ + $(TOP_DIR)/tests/all.tcl -singleproc 1 -constraints valgrind \ + -file $(basename $(notdir $@)) > $@.tmp 2>&1 + @mv $@.tmp $@ +.PRECIOUS: testresults/valgrind/%.result + + +testresults/valgrind/%.success: testresults/valgrind/%.result + @printf '%s' valgrind >&2 + @printf ' %s' $(basename $(notdir $@)) >&2 + @printf '\n >&2' + @status=$$(./${TCLTEST_EXE} $(TOP_DIR)/tools/valgrind_check_success \ + file $(basename $@).result); \ + if [ "$$status" -eq 1 ]; then touch $@; exit 0; else exit 1; fi + +valgrind_each: $(addprefix testresults/valgrind/,$(addsuffix .success,$(notdir\ + $(wildcard $(TOP_DIR)/tests/*.test)))) + + valgrindshell: ${TCL_EXE} $(SHELL_ENV) $(VALGRIND) $(VALGRINDARGS) ./${TCL_EXE} $(SCRIPT) -- cgit v0.12 From 4dcd70c967a3cecc535854cfe982c8180ffdf30a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 22 Mar 2023 21:11:12 +0000 Subject: Remove knownProfileBug constraint: this is already fixed --- tests/io.test | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/tests/io.test b/tests/io.test index 58d276b..a085976 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5677,10 +5677,7 @@ test io-39.15 {Tcl_SetChannelOption: -encoding, binary & utf-8} { close $f set x } 牦 -# Remove knownProfileBug constraint below post TIP656- TODO -test io-39.16 {Tcl_SetChannelOption: -encoding (shortened to "-en"), errors} -constraints { - knownProfileBug -} -body { +test io-39.16 {Tcl_SetChannelOption: -encoding (shortened to "-en"), errors} -body { file delete $path(test1) set f [open $path(test1) w] fconfigure $f -en foobar -- cgit v0.12 From 5e0c34678e24c5ffe05c8a04f4395416067cefc1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Mar 2023 15:48:54 +0000 Subject: Resolve C warnings on 32-bit platforms. More code cleanup. --- generic/tclTest.c | 237 +++++++++++++++++++++++++++--------------------------- 1 file changed, 119 insertions(+), 118 deletions(-) diff --git a/generic/tclTest.c b/generic/tclTest.c index c2b7144..2b4b24f 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -9,7 +9,7 @@ * Copyright (c) 1993-1994 The Regents of the University of California. * Copyright (c) 1994-1997 Sun Microsystems, Inc. * Copyright (c) 1998-2000 Ajuba Solutions. - * Copyright (c) 2003 by Kevin B. Kenny. All rights reserved. + * Copyright (c) 2003 Kevin B. Kenny. All rights reserved. * * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -569,9 +569,9 @@ Tcltest_Init( Tcl_CreateCommand(interp, "testsetnoerr", TestsetCmd, NULL, NULL); Tcl_CreateCommand(interp, "testseterr", TestsetCmd, - (ClientData) TCL_LEAVE_ERR_MSG, NULL); + INT2PTR(TCL_LEAVE_ERR_MSG), NULL); Tcl_CreateCommand(interp, "testset2", Testset2Cmd, - (ClientData) TCL_LEAVE_ERR_MSG, NULL); + INT2PTR(TCL_LEAVE_ERR_MSG), NULL); Tcl_CreateCommand(interp, "testseterrorcode", TestseterrorcodeCmd, NULL, NULL); Tcl_CreateObjCommand(interp, "testsetobjerrorcode", @@ -1212,11 +1212,11 @@ TestcmdtraceCmd( deleteCalled = 0; cmdTrace = Tcl_CreateObjTrace(interp, 50000, TCL_ALLOW_INLINE_COMPILATION, ObjTraceProc, - (ClientData) &deleteCalled, ObjTraceDeleteProc); + &deleteCalled, ObjTraceDeleteProc); result = Tcl_Eval(interp, argv[2]); Tcl_DeleteTrace(interp, cmdTrace); if (!deleteCalled) { - Tcl_SetResult(interp, "Delete wasn't called", TCL_STATIC); + Tcl_AppendResult(interp, "Delete wasn't called", NULL); return TCL_ERROR; } else { return result; @@ -1456,10 +1456,10 @@ TestdcallCmd( } if (id < 0) { Tcl_DontCallWhenDeleted(delInterp, DelCallbackProc, - (ClientData) INT2PTR(-id)); + INT2PTR(-id)); } else { Tcl_CallWhenDeleted(delInterp, DelCallbackProc, - (ClientData) INT2PTR(id)); + INT2PTR(id)); } } Tcl_DeleteInterp(delInterp); @@ -1514,7 +1514,7 @@ TestdelCmd( Tcl_Interp *child; if (argc != 4) { - Tcl_SetResult(interp, "wrong # args", TCL_STATIC); + Tcl_AppendResult(interp, "wrong # args", NULL); return TCL_ERROR; } @@ -1528,7 +1528,7 @@ TestdelCmd( dPtr->deleteCmd = (char *)ckalloc(strlen(argv[3]) + 1); strcpy(dPtr->deleteCmd, argv[3]); - Tcl_CreateCommand(child, argv[2], DelCmdProc, (ClientData) dPtr, + Tcl_CreateCommand(child, argv[2], DelCmdProc, dPtr, DelDeleteProc); return TCL_OK; } @@ -1616,14 +1616,11 @@ TestdelassocdataCmd( */ static int -TestdoubledigitsObjCmd(ClientData unused, - /* NULL */ - Tcl_Interp* interp, - /* Tcl interpreter */ - int objc, - /* Parameter count */ - Tcl_Obj* const objv[]) - /* Parameter vector */ +TestdoubledigitsObjCmd( + ClientData unused, /* NULL */ + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Parameter count */ + Tcl_Obj* const objv[]) /* Parameter vector */ { static const char *options[] = { "shortest", @@ -1646,7 +1643,7 @@ TestdoubledigitsObjCmd(ClientData unused, int type; int decpt; int signum; - char * str; + char *str; char *endPtr; Tcl_Obj* strObj; Tcl_Obj* retval; @@ -1718,7 +1715,7 @@ TestdstringCmd( if (argc < 2) { wrongNumArgs: - Tcl_SetResult(interp, "wrong # args", TCL_STATIC); + Tcl_AppendResult(interp, "wrong # args", NULL); return TCL_ERROR; } if (strcmp(argv[1], "append") == 0) { @@ -1754,11 +1751,11 @@ TestdstringCmd( goto wrongNumArgs; } if (strcmp(argv[2], "staticsmall") == 0) { - Tcl_SetResult(interp, "short", TCL_STATIC); + Tcl_AppendResult(interp, "short", NULL); } else if (strcmp(argv[2], "staticlarge") == 0) { - Tcl_SetResult(interp, "first0 first1 first2 first3 first4 first5 first6 first7 first8 first9\nsecond0 second1 second2 second3 second4 second5 second6 second7 second8 second9\nthird0 third1 third2 third3 third4 third5 third6 third7 third8 third9\nfourth0 fourth1 fourth2 fourth3 fourth4 fourth5 fourth6 fourth7 fourth8 fourth9\nfifth0 fifth1 fifth2 fifth3 fifth4 fifth5 fifth6 fifth7 fifth8 fifth9\nsixth0 sixth1 sixth2 sixth3 sixth4 sixth5 sixth6 sixth7 sixth8 sixth9\nseventh0 seventh1 seventh2 seventh3 seventh4 seventh5 seventh6 seventh7 seventh8 seventh9\n", TCL_STATIC); + Tcl_AppendResult(interp, "first0 first1 first2 first3 first4 first5 first6 first7 first8 first9\nsecond0 second1 second2 second3 second4 second5 second6 second7 second8 second9\nthird0 third1 third2 third3 third4 third5 third6 third7 third8 third9\nfourth0 fourth1 fourth2 fourth3 fourth4 fourth5 fourth6 fourth7 fourth8 fourth9\nfifth0 fifth1 fifth2 fifth3 fifth4 fifth5 fifth6 fifth7 fifth8 fifth9\nsixth0 sixth1 sixth2 sixth3 sixth4 sixth5 sixth6 sixth7 sixth8 sixth9\nseventh0 seventh1 seventh2 seventh3 seventh4 seventh5 seventh6 seventh7 seventh8 seventh9\n", NULL); } else if (strcmp(argv[2], "free") == 0) { - char *s = ckalloc(100); + char *s = (char *)ckalloc(100); strcpy(s, "This is a malloc-ed string"); Tcl_SetResult(interp, s, TCL_DYNAMIC); } else if (strcmp(argv[2], "special") == 0) { @@ -1810,9 +1807,9 @@ TestdstringCmd( * Tcl_DStringGetResult handles freeProc's other than free. */ -static void SpecialFree(blockPtr) - char *blockPtr; /* Block to free. */ -{ +static void SpecialFree( + char *blockPtr /* Block to free. */ +) { ckfree(blockPtr - 16); } @@ -1859,8 +1856,8 @@ static int UtfExtWrapper( Tcl_Encoding encoding; Tcl_EncodingState encState, *encStatePtr; int srcLen, bufLen; - const char *bytes; - char *bufPtr; + const unsigned char *bytes; + unsigned char *bufPtr; int srcRead, dstLen, dstWrote, dstChars; Tcl_Obj *srcReadVar, *dstWroteVar, *dstCharsVar; int result; @@ -1901,8 +1898,7 @@ static int UtfExtWrapper( int flag; if (Tcl_GetIntFromObj(NULL, flagObjs[i], &flag) == TCL_OK) { flags |= flag; - } - else { + } else { int idx; if (Tcl_GetIndexFromObjStruct(interp, flagObjs[i], @@ -1920,13 +1916,14 @@ static int UtfExtWrapper( /* Assumes state is integer if not "" */ Tcl_WideInt wide; if (Tcl_GetWideIntFromObj(interp, objv[5], &wide) == TCL_OK) { - encState = (Tcl_EncodingState) wide; + encState = (Tcl_EncodingState)(size_t)wide; encStatePtr = &encState; } else if (Tcl_GetCharLength(objv[5]) == 0) { encStatePtr = NULL; } else { return TCL_ERROR; } + if (Tcl_GetIntFromObj(interp, objv[6], &dstLen) != TCL_OK) { return TCL_ERROR; } @@ -1969,12 +1966,12 @@ static int UtfExtWrapper( } bufLen = dstLen + 4; /* 4 -> overflow detection */ - bufPtr = ckalloc(bufLen); + bufPtr = (unsigned char *) ckalloc(bufLen); memset(bufPtr, 0xFF, dstLen); /* Need to check nul terminator */ memmove(bufPtr + dstLen, "\xAB\xCD\xEF\xAB", 4); /* overflow detection */ - bytes = (char *) Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ - result = (*transformer)(interp, encoding, bytes, srcLen, flags, - encStatePtr, bufPtr, dstLen, + bytes = Tcl_GetByteArrayFromObj(objv[3], &srcLen); /* Last! to avoid shimmering */ + result = (*transformer)(interp, encoding, (const char *)bytes, srcLen, flags, + encStatePtr, (char *) bufPtr, dstLen, srcReadVar ? &srcRead : NULL, &dstWrote, dstCharsVar ? &dstChars : NULL); @@ -2007,8 +2004,8 @@ static int UtfExtWrapper( } result = TCL_OK; resultObjs[1] = - encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)encState) : Tcl_NewObj(); - resultObjs[2] = Tcl_NewByteArrayObj((unsigned char *)bufPtr, dstLen); + encStatePtr ? Tcl_NewWideIntObj((Tcl_WideInt)(size_t)encState) : Tcl_NewObj(); + resultObjs[2] = Tcl_NewByteArrayObj(bufPtr, dstLen); if (srcReadVar) { if (Tcl_ObjSetVar2(interp, srcReadVar, @@ -2079,6 +2076,11 @@ TestencodingObjCmd( ENC_CREATE, ENC_DELETE, ENC_EXTTOUTF, ENC_UTFTOEXT }; + if (objc < 2) { + Tcl_WrongNumArgs(interp, 1, objv, "command ?args?"); + return TCL_ERROR; + } + if (Tcl_GetIndexFromObj(interp, objv[1], optionStrings, "option", 0, &index) != TCL_OK) { return TCL_ERROR; @@ -2089,6 +2091,7 @@ TestencodingObjCmd( Tcl_EncodingType type; if (objc != 5) { + Tcl_WrongNumArgs(interp, 2, objv, "name toutfcmd fromutfcmd"); return TCL_ERROR; } encodingPtr = (TclEncoding *)ckalloc(sizeof(TclEncoding)); @@ -2108,7 +2111,7 @@ TestencodingObjCmd( type.toUtfProc = EncodingToUtfProc; type.fromUtfProc = EncodingFromUtfProc; type.freeProc = EncodingFreeProc; - type.clientData = (ClientData) encodingPtr; + type.clientData = encodingPtr; type.nullSize = 1; Tcl_CreateEncoding(&type); @@ -2118,9 +2121,11 @@ TestencodingObjCmd( if (objc != 3) { return TCL_ERROR; } - encoding = Tcl_GetEncoding(NULL, Tcl_GetString(objv[2])); - Tcl_FreeEncoding(encoding); - Tcl_FreeEncoding(encoding); + if (TCL_OK != Tcl_GetEncodingFromObj(interp, objv[2], &encoding)) { + return TCL_ERROR; + } + Tcl_FreeEncoding(encoding); /* Free returned reference */ + Tcl_FreeEncoding(encoding); /* Free to match CREATE */ break; case ENC_EXTTOUTF: return UtfExtWrapper(interp,Tcl_ExternalToUtf,objc,objv); @@ -2147,7 +2152,7 @@ EncodingToUtfProc( TclEncoding *encodingPtr; encodingPtr = (TclEncoding *) clientData; - Tcl_EvalEx(encodingPtr->interp,encodingPtr->toUtfCmd,-1,TCL_EVAL_GLOBAL); + Tcl_EvalEx(encodingPtr->interp, encodingPtr->toUtfCmd, -1, TCL_EVAL_GLOBAL); len = strlen(Tcl_GetStringResult(encodingPtr->interp)); if (len > dstLen) { @@ -2179,7 +2184,7 @@ EncodingFromUtfProc( TclEncoding *encodingPtr; encodingPtr = (TclEncoding *) clientData; - Tcl_EvalEx(encodingPtr->interp, encodingPtr->fromUtfCmd,-1,TCL_EVAL_GLOBAL); + Tcl_EvalEx(encodingPtr->interp, encodingPtr->fromUtfCmd, -1, TCL_EVAL_GLOBAL); len = strlen(Tcl_GetStringResult(encodingPtr->interp)); if (len > dstLen) { @@ -2514,10 +2519,10 @@ TestexithandlerCmd( } if (strcmp(argv[1], "create") == 0) { Tcl_CreateExitHandler((value & 1) ? ExitProcOdd : ExitProcEven, - (ClientData) INT2PTR(value)); + INT2PTR(value)); } else if (strcmp(argv[1], "delete") == 0) { Tcl_DeleteExitHandler((value & 1) ? ExitProcOdd : ExitProcEven, - (ClientData) INT2PTR(value)); + INT2PTR(value)); } else { Tcl_AppendResult(interp, "bad option \"", argv[1], "\": must be create or delete", NULL); @@ -2587,7 +2592,7 @@ TestexprlongCmd( " expression\"", NULL); return TCL_ERROR; } - Tcl_SetResult(interp, "This is a result", TCL_STATIC); + Tcl_AppendResult(interp, "This is a result", NULL); result = Tcl_ExprLong(interp, argv[1], &exprResult); if (result != TCL_OK) { return result; @@ -2629,7 +2634,7 @@ TestexprlongobjCmd( Tcl_WrongNumArgs(interp, 1, objv, "expression"); return TCL_ERROR; } - Tcl_SetResult(interp, "This is a result", TCL_STATIC); + Tcl_AppendResult(interp, "This is a result", NULL); result = Tcl_ExprLongObj(interp, objv[1], &exprResult); if (result != TCL_OK) { return result; @@ -2672,7 +2677,7 @@ TestexprdoubleCmd( " expression\"", NULL); return TCL_ERROR; } - Tcl_SetResult(interp, "This is a result", TCL_STATIC); + Tcl_AppendResult(interp, "This is a result", NULL); result = Tcl_ExprDouble(interp, argv[1], &exprResult); if (result != TCL_OK) { return result; @@ -2715,7 +2720,7 @@ TestexprdoubleobjCmd( Tcl_WrongNumArgs(interp, 1, objv, "expression"); return TCL_ERROR; } - Tcl_SetResult(interp, "This is a result", TCL_STATIC); + Tcl_AppendResult(interp, "This is a result", NULL); result = Tcl_ExprDoubleObj(interp, objv[1], &exprResult); if (result != TCL_OK) { return result; @@ -2969,7 +2974,7 @@ TestlinkCmd( static int intVar = 43; static int boolVar = 4; static double realVar = 1.23; - static Tcl_WideInt wideVar = Tcl_LongAsWide(79); + static Tcl_WideInt wideVar = 79; static char *stringVar = NULL; static char charVar = '@'; static unsigned char ucharVar = 130; @@ -2979,7 +2984,7 @@ TestlinkCmd( static long longVar = 123456789L; static unsigned long ulongVar = 3456789012UL; static float floatVar = 4.5; - static Tcl_WideUInt uwideVar = (Tcl_WideUInt) Tcl_LongAsWide(123); + static Tcl_WideUInt uwideVar = 123; static int created = 0; char buffer[2*TCL_DOUBLE_SPACE]; int writable, flag; @@ -3019,112 +3024,112 @@ TestlinkCmd( if (Tcl_GetBoolean(interp, argv[2], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "int", (char *) &intVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "int", (char *)&intVar, TCL_LINK_INT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[3], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "real", (char *) &realVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "real", (char *)&realVar, TCL_LINK_DOUBLE | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[4], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "bool", (char *) &boolVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "bool", (char *)&boolVar, TCL_LINK_BOOLEAN | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[5], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "string", (char *) &stringVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "string", (char *)&stringVar, TCL_LINK_STRING | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[6], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "wide", (char *) &wideVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "wide", (char *)&wideVar, TCL_LINK_WIDE_INT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[7], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "char", (char *) &charVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "char", (char *)&charVar, TCL_LINK_CHAR | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[8], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "uchar", (char *) &ucharVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "uchar", (char *)&ucharVar, TCL_LINK_UCHAR | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[9], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "short", (char *) &shortVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "short", (char *)&shortVar, TCL_LINK_SHORT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[10], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "ushort", (char *) &ushortVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "ushort", (char *)&ushortVar, TCL_LINK_USHORT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[11], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "uint", (char *) &uintVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "uint", (char *)&uintVar, TCL_LINK_UINT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[12], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "long", (char *) &longVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "long", (char *)&longVar, TCL_LINK_LONG | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[13], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "ulong", (char *) &ulongVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "ulong", (char *)&ulongVar, TCL_LINK_ULONG | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[14], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "float", (char *) &floatVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "float", (char *)&floatVar, TCL_LINK_FLOAT | flag) != TCL_OK) { return TCL_ERROR; } if (Tcl_GetBoolean(interp, argv[15], &writable) != TCL_OK) { return TCL_ERROR; } - flag = (writable != 0) ? 0 : TCL_LINK_READ_ONLY; - if (Tcl_LinkVar(interp, "uwide", (char *) &uwideVar, + flag = writable ? 0 : TCL_LINK_READ_ONLY; + if (Tcl_LinkVar(interp, "uwide", (char *)&uwideVar, TCL_LINK_WIDE_UINT | flag) != TCL_OK) { return TCL_ERROR; } @@ -3624,6 +3629,7 @@ TestMathFunc2( * *---------------------------------------------------------------------- */ + static void CleanupTestSetassocdataTests( ClientData clientData, /* Data to be released. */ @@ -3977,7 +3983,7 @@ TestregexpObjCmd( "-xflags", "--", NULL }; - enum options { + enum optionsEnum { REGEXP_INDICES, REGEXP_NOCASE, REGEXP_ABOUT, REGEXP_EXPANDED, REGEXP_MULTI, REGEXP_NOCROSS, REGEXP_NEWL, REGEXP_XFLAGS, @@ -4002,7 +4008,7 @@ TestregexpObjCmd( &index) != TCL_OK) { return TCL_ERROR; } - switch ((enum options) index) { + switch ((enum optionsEnum) index) { case REGEXP_INDICES: indices = 1; break; @@ -4325,7 +4331,7 @@ TestsetassocdataCmd( return TCL_ERROR; } - buf = ckalloc(strlen(argv[2]) + 1); + buf = (char *)ckalloc(strlen(argv[2]) + 1); strcpy(buf, argv[2]); /* @@ -4338,8 +4344,7 @@ TestsetassocdataCmd( ckfree(oldData); } - Tcl_SetAssocData(interp, argv[1], CleanupTestSetassocdataTests, - (ClientData) buf); + Tcl_SetAssocData(interp, argv[1], CleanupTestSetassocdataTests, buf); return TCL_OK; } @@ -4562,7 +4567,7 @@ TestseterrorcodeCmd( const char **argv) /* Argument strings. */ { if (argc > 6) { - Tcl_SetResult(interp, "too many args", TCL_STATIC); + Tcl_AppendResult(interp, "too many args", NULL); return TCL_ERROR; } switch (argc) { @@ -4913,10 +4918,10 @@ GetTimesObjCmd( /* alloc 5000 times */ fprintf(stderr, "alloc 5000 6 word items\n"); - objv = (Tcl_Obj **) ckalloc(5000 * sizeof(Tcl_Obj *)); + objv = (Tcl_Obj **)ckalloc(5000 * sizeof(Tcl_Obj *)); Tcl_GetTime(&start); for (i = 0; i < 5000; i++) { - objv[i] = (Tcl_Obj *) ckalloc(sizeof(Tcl_Obj)); + objv[i] = (Tcl_Obj *)ckalloc(sizeof(Tcl_Obj)); } Tcl_GetTime(&stop); timePer = (stop.sec - start.sec)*1000000 + (stop.usec - start.usec); @@ -5299,7 +5304,7 @@ TestsetCmd( const char *value; if (argc == 2) { - Tcl_SetResult(interp, "before get", TCL_STATIC); + Tcl_AppendResult(interp, "before get", NULL); value = Tcl_GetVar2(interp, argv[1], NULL, flags); if (value == NULL) { return TCL_ERROR; @@ -5307,7 +5312,7 @@ TestsetCmd( Tcl_AppendElement(interp, value); return TCL_OK; } else if (argc == 3) { - Tcl_SetResult(interp, "before set", TCL_STATIC); + Tcl_AppendResult(interp, "before set", NULL); value = Tcl_SetVar2(interp, argv[1], NULL, argv[2], flags); if (value == NULL) { return TCL_ERROR; @@ -5331,7 +5336,7 @@ Testset2Cmd( const char *value; if (argc == 3) { - Tcl_SetResult(interp, "before get", TCL_STATIC); + Tcl_AppendResult(interp, "before get", NULL); value = Tcl_GetVar2(interp, argv[1], argv[2], flags); if (value == NULL) { return TCL_ERROR; @@ -5339,7 +5344,7 @@ Testset2Cmd( Tcl_AppendElement(interp, value); return TCL_OK; } else if (argc == 4) { - Tcl_SetResult(interp, "before set", TCL_STATIC); + Tcl_AppendResult(interp, "before set", NULL); value = Tcl_SetVar2(interp, argv[1], argv[2], argv[3], flags); if (value == NULL) { return TCL_ERROR; @@ -5516,7 +5521,7 @@ TestmainthreadCmd( Tcl_SetObjResult(interp, idObj); return TCL_OK; } else { - Tcl_SetResult(interp, "wrong # args", TCL_STATIC); + Tcl_AppendResult(interp, "wrong # args", NULL); return TCL_ERROR; } } @@ -5571,9 +5576,9 @@ TestsetmainloopCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - exitMainLoop = 0; - Tcl_SetMainLoop(MainLoop); - return TCL_OK; + exitMainLoop = 0; + Tcl_SetMainLoop(MainLoop); + return TCL_OK; } /* @@ -5600,8 +5605,8 @@ TestexitmainloopCmd( int argc, /* Number of arguments. */ const char **argv) /* Argument strings. */ { - exitMainLoop = 1; - return TCL_OK; + exitMainLoop = 1; + return TCL_OK; } /* @@ -5688,7 +5693,7 @@ TestChannelCmd( if ((cmdName[0] == 's') && (strncmp(cmdName, "setchannelerror", len) == 0)) { - Tcl_Obj *msg = Tcl_NewStringObj(argv[3],-1); + Tcl_Obj *msg = Tcl_NewStringObj(argv[3], -1); Tcl_IncrRefCount(msg); Tcl_SetChannelError(chan, msg); @@ -5701,7 +5706,7 @@ TestChannelCmd( } if ((cmdName[0] == 's') && (strncmp(cmdName, "setchannelerrorinterp", len) == 0)) { - Tcl_Obj *msg = Tcl_NewStringObj(argv[3],-1); + Tcl_Obj *msg = Tcl_NewStringObj(argv[3], -1); Tcl_IncrRefCount(msg); Tcl_SetChannelErrorInterp(interp, msg); @@ -6133,8 +6138,7 @@ TestChannelEventCmd( return TCL_ERROR; } - esPtr = (EventScriptRecord *) ckalloc((unsigned) - sizeof(EventScriptRecord)); + esPtr = (EventScriptRecord *)ckalloc(sizeof(EventScriptRecord)); esPtr->nextPtr = statePtr->scriptRecordPtr; statePtr->scriptRecordPtr = esPtr; @@ -6145,7 +6149,7 @@ TestChannelEventCmd( Tcl_IncrRefCount(esPtr->scriptPtr); Tcl_CreateChannelHandler((Tcl_Channel) chanPtr, mask, - TclChannelEventScriptInvoker, (ClientData) esPtr); + TclChannelEventScriptInvoker, esPtr); return TCL_OK; } @@ -6189,7 +6193,7 @@ TestChannelEventCmd( prevEsPtr->nextPtr = esPtr->nextPtr; } Tcl_DeleteChannelHandler((Tcl_Channel) chanPtr, - TclChannelEventScriptInvoker, (ClientData) esPtr); + TclChannelEventScriptInvoker, esPtr); Tcl_DecrRefCount(esPtr->scriptPtr); ckfree(esPtr); @@ -6230,7 +6234,7 @@ TestChannelEventCmd( esPtr = nextEsPtr) { nextEsPtr = esPtr->nextPtr; Tcl_DeleteChannelHandler((Tcl_Channel) chanPtr, - TclChannelEventScriptInvoker, (ClientData) esPtr); + TclChannelEventScriptInvoker, esPtr); Tcl_DecrRefCount(esPtr->scriptPtr); ckfree(esPtr); } @@ -6276,7 +6280,7 @@ TestChannelEventCmd( } esPtr->mask = mask; Tcl_CreateChannelHandler((Tcl_Channel) chanPtr, mask, - TclChannelEventScriptInvoker, (ClientData) esPtr); + TclChannelEventScriptInvoker, esPtr); return TCL_OK; } Tcl_AppendResult(interp, "bad command ", cmd, ", must be one of " @@ -6359,12 +6363,7 @@ TestWrongNumArgsObjCmd( const char *msg; if (objc < 3) { - /* - * Don't use Tcl_WrongNumArgs here, as that is the function - * we want to test! - */ - Tcl_SetResult(interp, "insufficient arguments", TCL_STATIC); - return TCL_ERROR; + goto insufArgs; } if (Tcl_GetIntFromObj(interp, objv[1], &i) != TCL_OK) { @@ -6380,7 +6379,8 @@ TestWrongNumArgsObjCmd( /* * Asked for more arguments than were given. */ - Tcl_SetResult(interp, "insufficient arguments", TCL_STATIC); + insufArgs: + Tcl_AppendResult(interp, "insufficient arguments", NULL); return TCL_ERROR; } @@ -6479,7 +6479,7 @@ TestFilesystemObjCmd( return TCL_ERROR; } if (boolVal) { - res = Tcl_FSRegister((ClientData)interp, &testReportingFilesystem); + res = Tcl_FSRegister(interp, &testReportingFilesystem); msg = (res == TCL_OK) ? "registered" : "failed"; } else { res = Tcl_FSUnregister(&testReportingFilesystem); @@ -6514,7 +6514,7 @@ TestReportInFilesystem( return -1; } lastPathPtr = NULL; - *clientDataPtr = (ClientData) newPathPtr; + *clientDataPtr = newPathPtr; return TCL_OK; } @@ -6850,7 +6850,7 @@ TestSimpleFilesystemObjCmd( return TCL_ERROR; } if (boolVal) { - res = Tcl_FSRegister((ClientData)interp, &simpleFilesystem); + res = Tcl_FSRegister(interp, &simpleFilesystem); msg = (res == TCL_OK) ? "registered" : "failed"; } else { res = Tcl_FSUnregister(&simpleFilesystem); @@ -6883,7 +6883,7 @@ SimpleRedirect( Tcl_IncrRefCount(pathPtr); return pathPtr; } - origPtr = Tcl_NewStringObj(str+10,-1); + origPtr = Tcl_NewStringObj(str+10, -1); Tcl_IncrRefCount(origPtr); return origPtr; } @@ -7282,7 +7282,7 @@ TestHashSystemHashCmd( hPtr = Tcl_CreateHashEntry(&hash, INT2PTR(i), &isNew); if (!isNew) { Tcl_SetObjResult(interp, Tcl_NewIntObj(i)); - Tcl_AppendToObj(Tcl_GetObjResult(interp)," creation problem",-1); + Tcl_AppendToObj(Tcl_GetObjResult(interp)," creation problem", -1); Tcl_DeleteHashTable(&hash); return TCL_ERROR; } @@ -7805,6 +7805,7 @@ InterpCmdResolver( Namespace *callerNsPtr = varFramePtr->nsPtr; Tcl_Command resolvedCmdPtr = NULL; (void)dummy; + (void)flags; /* * Just do something special on a cmd literal "z" in two cases: @@ -7864,7 +7865,7 @@ InterpCmdResolver( */ CallFrame *parentFramePtr = varFramePtr->callerPtr; - char *context = parentFramePtr != NULL ? parentFramePtr->nsPtr->name : "(NULL)"; + const char *context = parentFramePtr != NULL ? parentFramePtr->nsPtr->name : "(NULL)"; if (strcmp(context, "ctx1") == 0 && (name[0] == 'z') && (name[1] == '\0')) { resolvedCmdPtr = Tcl_FindCommand(interp, "y", NULL, TCL_GLOBAL_ONLY); @@ -7986,7 +7987,7 @@ InterpCompiledVarResolver( Tcl_ResolvedVarInfo **rPtr) { if (*name == 'T') { - MyResolvedVarInfo *resVarInfo = ckalloc(sizeof(MyResolvedVarInfo)); + MyResolvedVarInfo *resVarInfo = (MyResolvedVarInfo *)ckalloc(sizeof(MyResolvedVarInfo)); resVarInfo->vInfo.fetchProc = MyCompiledVarFetch; resVarInfo->vInfo.deleteProc = MyCompiledVarFree; -- cgit v0.12