From 8390c51fcaeaa278ec7ec40ec5d31ee187c25208 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 07:35:21 +0000 Subject: Fix [1d074b177a]. Failure to read .tclshrc --- unix/tclAppInit.c | 11 ++++++----- win/tclAppInit.c | 7 +++++-- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/unix/tclAppInit.c b/unix/tclAppInit.c index 05d25de..e3d95bc 100644 --- a/unix/tclAppInit.c +++ b/unix/tclAppInit.c @@ -158,15 +158,16 @@ Tcl_AppInit( * is the name of the application. If this line is deleted then no * user-specific startup file will be run under any conditions. */ - #ifdef DJGPP - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclsh.rc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME "tclshrc.tcl" #else - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/.tclshrc", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); +#define INITFILENAME ".tclshrc" #endif + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/" INITFILENAME "]", + -1, + TCL_EVAL_GLOBAL); return TCL_OK; } diff --git a/win/tclAppInit.c b/win/tclAppInit.c index 30127fd..077500a 100644 --- a/win/tclAppInit.c +++ b/win/tclAppInit.c @@ -215,8 +215,11 @@ Tcl_AppInit( * user-specific startup file will be run under any conditions. */ - Tcl_ObjSetVar2(interp, Tcl_NewStringObj("tcl_rcFileName", TCL_INDEX_NONE), NULL, - Tcl_NewStringObj("~/tclshrc.tcl", TCL_INDEX_NONE), TCL_GLOBAL_ONLY); + (void)Tcl_EvalEx(interp, + "set tcl_rcFileName [file tildeexpand ~/tclshrc.tcl]", + -1, + TCL_EVAL_GLOBAL); + return TCL_OK; } -- cgit v0.12 From a826e66f11a2823847cb7788a1d929a9799c95ad Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 08:58:31 +0000 Subject: Add tests for Bug [46dda6fc29] --- tests/dstring.test | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/dstring.test b/tests/dstring.test index 314cee8..8699a5e 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} +test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\n"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\{"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\}"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} +test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\\"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result {} test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From aa2b48262b02b2f6f23ba7f032f8ea1fb0bddbe3 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 09:26:06 +0000 Subject: Fix and tests for [46dda6fc29] --- generic/tclUtil.c | 4 ++-- tests/dstring.test | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclUtil.c b/generic/tclUtil.c index e96a564..8f2c16f 100644 --- a/generic/tclUtil.c +++ b/generic/tclUtil.c @@ -1143,13 +1143,13 @@ TclScanElement( */ requireEscape = 1; - length -= (length > 0); + length -= (length+1 > 1); p++; break; } if ((p[1] == '{') || (p[1] == '}') || (p[1] == '\\')) { extra++; /* Escape sequences all one byte longer. */ - length -= (length > 0); + length -= (length+1 > 1); p++; } forbidNone = 1; diff --git a/tests/dstring.test b/tests/dstring.test index 8699a5e..23863d0 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -218,7 +218,7 @@ test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result \\\\\\n test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -226,7 +226,7 @@ test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \{]] test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -234,7 +234,7 @@ test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \}]] test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { testdstring free } -body { @@ -242,7 +242,7 @@ test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constra testdstring get } -cleanup { testdstring free -} -result {} +} -result [list [list \\]] test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From 2a5b403768444ddf2d6379ffe3644e9d5b230e19 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 16:29:09 +0000 Subject: Experimental fix for [fb368527ae] - length truncation --- generic/tclEncoding.c | 86 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 67 insertions(+), 19 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..3a39966 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1160,8 +1160,8 @@ Tcl_ExternalToUtfDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; - Tcl_Size dstLen; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; Tcl_DStringInit(dstPtr); @@ -1179,23 +1179,47 @@ Tcl_ExternalToUtfDStringEx( srcLen = encodingPtr->lengthProc(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= ENCODING_INPUT; } while (1) { - result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, - flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { Tcl_DStringSetLength(dstPtr, soFar); return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); } + flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } @@ -1398,9 +1422,9 @@ Tcl_UtfToExternalDStringEx( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int result, soFar, srcRead, dstWrote, dstChars; + int result; + Tcl_Size dstLen, soFar; const char *srcStart = src; - Tcl_Size dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1416,16 +1440,40 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; while (1) { + int srcChunkLen, srcChunkRead; + int dstChunkLen, dstChunkWrote, dstChunkChars; + + if (srcLen > INT_MAX) { + srcChunkLen = INT_MAX; + } else { + srcChunkLen = srcLen; + flags |= TCL_ENCODING_END; /* Last chunk */ + } + dstChunkLen = dstLen > INT_MAX ? INT_MAX : dstLen; + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, - &srcRead, &dstWrote, &dstChars); - soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + srcChunkLen, flags, &state, dst, dstChunkLen, + &srcChunkRead, &dstChunkWrote, &dstChunkChars); + soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); - src += srcRead; - if (result != TCL_CONVERT_NOSPACE) { - int i = soFar + encodingPtr->nullSize - 1; + /* Move past the part processed in this go around */ + src += srcChunkRead; + srcLen -= srcChunkRead; + + /* + * Keep looping in two case - + * - our destination buffer did not have enough room + * - we had not passed in all the data and error indicated fragment + * of a multibyte character + * In both cases we have to grow buffer, move the input source pointer + * and loop. Otherwise, return the result we got. + */ + if ((result != TCL_CONVERT_NOSPACE) && + !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { + size_t i = soFar + encodingPtr->nullSize - 1; + /* Loop as DStringSetLength only stores one nul byte at a time */ while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } @@ -1433,7 +1481,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; - srcLen -= srcRead; + if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); } -- cgit v0.12 From 7b0b2ebd37ef0e7ea2e38a394ee476fed9829a35 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sat, 25 Feb 2023 17:23:42 +0000 Subject: Fix large writes to file. Need to break into INT_MAX size chunks. --- generic/tclIO.c | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 26d0011..82887d9 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4231,7 +4231,6 @@ Tcl_WriteObj( Channel *chanPtr; ChannelState *statePtr; /* State info for channel */ const char *src; - size_t srcLen = 0; statePtr = ((Channel *) chan)->state; chanPtr = statePtr->topChanPtr; @@ -4240,19 +4239,45 @@ Tcl_WriteObj( return TCL_INDEX_NONE; } if (statePtr->encoding == NULL) { - size_t result; + size_t srcLen; + size_t totalWritten = 0; src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); + /* TODO - refactor common code below */ if (src == NULL) { Tcl_SetErrno(EILSEQ); - result = TCL_INDEX_NONE; + totalWritten = TCL_INDEX_NONE; } else { - result = WriteBytes(chanPtr, src, srcLen); - } - return result; + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + written = WriteBytes(chanPtr, src, chunkSize); + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + + return totalWritten; } else { + size_t srcLen; + size_t totalWritten = 0; src = Tcl_GetStringFromObj(objPtr, &srcLen); - return WriteChars(chanPtr, src, srcLen); + /* + * Note original code always called WriteChars even if srcLen 0 + * so we will too. + */ + do { + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + written = WriteChars(chanPtr, src, chunkSize); + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + return totalWritten; } } -- cgit v0.12 From 2fb6b99620807fff819ce6c0c7ac60ae4774fc73 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 26 Feb 2023 06:26:01 +0000 Subject: Minor refactor, add tests --- generic/tclIO.c | 59 ++++++++++++++++++++++++--------------------------------- tests/io.test | 29 ++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 34 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 82887d9..ff0e7fb 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4238,47 +4238,38 @@ Tcl_WriteObj( if (CheckChannelErrors(statePtr, TCL_WRITABLE) != 0) { return TCL_INDEX_NONE; } - if (statePtr->encoding == NULL) { - size_t srcLen; - size_t totalWritten = 0; + size_t srcLen; + if (statePtr->encoding == NULL) { src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); - /* TODO - refactor common code below */ if (src == NULL) { Tcl_SetErrno(EILSEQ); - totalWritten = TCL_INDEX_NONE; - } else { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteBytes(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - - return totalWritten; + return TCL_INDEX_NONE; + } } else { - size_t srcLen; - size_t totalWritten = 0; src = Tcl_GetStringFromObj(objPtr, &srcLen); - /* - * Note original code always called WriteChars even if srcLen 0 - * so we will too. - */ - do { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteChars(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - return totalWritten; } + + size_t totalWritten = 0; + /* + * Note original code always called WriteChars even if srcLen 0 + * so we will too. + */ + do { + int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; + int written; + if (statePtr->encoding == NULL) { + written = WriteBytes(chanPtr, src, chunkSize); + } else { + written = WriteChars(chanPtr, src, chunkSize); + } + if (written < 0) { + return TCL_INDEX_NONE; + } + totalWritten += written; + srcLen -= chunkSize; + } while (srcLen); + return totalWritten; } static void diff --git a/tests/io.test b/tests/io.test index cb1c691..83735c3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -36,6 +36,7 @@ namespace eval ::tcl::test::io { } source [file join [file dirname [info script]] tcltests.tcl] +testConstraint pointerIs64bit [expr {$::tcl_platform(pointerSize) >= 8}] testConstraint testbytestring [llength [info commands testbytestring]] testConstraint testchannel [llength [info commands testchannel]] testConstraint testfevent [llength [info commands testfevent]] @@ -194,6 +195,20 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { set sizes } {19 19 19 19 19} +test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.10.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 + test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -236,6 +251,20 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { lappend x [contents $path(test1)] } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] +test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.5.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x80000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 2147483648 + test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written -- cgit v0.12 From deec081e744286a433ade1f6dad4e8fca0a20705 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Sun, 26 Feb 2023 07:15:06 +0000 Subject: Also fix [90ff9b7f73] - writes of exactly 4294967295 bytes --- generic/tclIOCmd.c | 6 +++--- tests/io.test | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 3 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 4ce27bb..9493a67 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -106,7 +106,7 @@ Tcl_PutsObjCmd( Tcl_Obj *string; /* String to write. */ Tcl_Obj *chanObjPtr = NULL; /* channel object. */ int newline; /* Add a newline at end? */ - int result; /* Result of puts operation. */ + size_t result; /* Result of puts operation. */ int mode; /* Mode in which channel is opened. */ switch (objc) { @@ -163,12 +163,12 @@ Tcl_PutsObjCmd( TclChannelPreserve(chan); result = Tcl_WriteObj(chan, string); - if (result == -1) { + if (result == (size_t) -1) { goto error; } if (newline != 0) { result = Tcl_WriteChars(chan, "\n", 1); - if (result == -1) { + if (result == (size_t) -1) { goto error; } } diff --git a/tests/io.test b/tests/io.test index 83735c3..20b240f 100644 --- a/tests/io.test +++ b/tests/io.test @@ -208,6 +208,33 @@ test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { # TODO - Should really read it back in but large reads are not currently working! file size $tmpfile } -result 2147483648 +test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.11.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-1.12.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile w] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 test io-2.1 {WriteBytes} { # loop until all bytes are written @@ -264,6 +291,33 @@ test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { # TODO - Should really read it back in but large reads are not currently working! file size $tmpfile } -result 2147483648 +test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.6.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + puts -nonewline $fd [string repeat A 0x100000000] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967296 +test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { + pointerIs64bit +} -setup { + set tmpfile [file join [temporaryDirectory] io-2.7.tmp] +} -cleanup { + file delete $tmpfile +} -body { + set fd [open $tmpfile wb] + # *Exactly* UINT_MAX - separate bug from the general large file tests + puts -nonewline $fd [string repeat A 0xffffffff] + close $fd + # TODO - Should really read it back in but large reads are not currently working! + file size $tmpfile +} -result 4294967295 test io-3.1 {WriteChars: compatibility with WriteBytes} { # loop until all bytes are written -- cgit v0.12 From 79cbdf745a36be243633e74267ba4dd96e62d8a5 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 12:27:43 +0000 Subject: (size_t) -1 -> TCL_INDEX_NONE --- generic/tclIOCmd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 9493a67..197ca32 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -163,12 +163,12 @@ Tcl_PutsObjCmd( TclChannelPreserve(chan); result = Tcl_WriteObj(chan, string); - if (result == (size_t) -1) { + if (result == TCL_INDEX_NONE) { goto error; } if (newline != 0) { result = Tcl_WriteChars(chan, "\n", 1); - if (result == (size_t) -1) { + if (result == TCL_INDEX_NONE) { goto error; } } -- cgit v0.12 From 5ffda39949b785859a8ab5b9b4977536dde6f9f2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 12:56:18 +0000 Subject: Move the "srcLen -= srcChunkRead;" past the "if ((result != TCL_CONVERT_NOSPACE)..." (where it originally was), since this isn't needed if the loop ends anyway. --- generic/tclEncoding.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3a39966..a6ecc26 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1187,7 +1187,7 @@ Tcl_ExternalToUtfDStringEx( while (1) { int srcChunkLen, srcChunkRead; int dstChunkLen, dstChunkWrote, dstChunkChars; - + if (srcLen > INT_MAX) { srcChunkLen = INT_MAX; } else { @@ -1202,7 +1202,6 @@ Tcl_ExternalToUtfDStringEx( soFar = dst + dstChunkWrote - Tcl_DStringValue(dstPtr); src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1210,7 +1209,7 @@ Tcl_ExternalToUtfDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1219,6 +1218,7 @@ Tcl_ExternalToUtfDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); @@ -1460,7 +1460,6 @@ Tcl_UtfToExternalDStringEx( /* Move past the part processed in this go around */ src += srcChunkRead; - srcLen -= srcChunkRead; /* * Keep looping in two case - @@ -1468,7 +1467,7 @@ Tcl_UtfToExternalDStringEx( * - we had not passed in all the data and error indicated fragment * of a multibyte character * In both cases we have to grow buffer, move the input source pointer - * and loop. Otherwise, return the result we got. + * and loop. Otherwise, return the result we got. */ if ((result != TCL_CONVERT_NOSPACE) && !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { @@ -1481,6 +1480,7 @@ Tcl_UtfToExternalDStringEx( } flags &= ~TCL_ENCODING_START; + srcLen -= srcChunkRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); -- cgit v0.12 From 8fdf20ec2b62f7da18e6acb82772c15d7ee2c596 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:01:29 +0000 Subject: More dstring testcases, extracted from [46dda6fc29] --- tests/dstring.test | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/tests/dstring.test b/tests/dstring.test index 8a24ebe..6cf4bb8 100644 --- a/tests/dstring.test +++ b/tests/dstring.test @@ -211,6 +211,38 @@ test dstring-2.15 {appending list elements} -constraints testdstring -setup { } -cleanup { testdstring free } -result {x #} +test dstring-2.16 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\n"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result \\\\\\n +test dstring-2.17 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\{"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \{]] +test dstring-2.18 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\}"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \}]] +test dstring-2.19 {appending list elements - bug [46dda6fc29] segfault} -constraints testdstring -setup { + testdstring free +} -body { + testdstring element "\\\\"; # Will setfault + testdstring get +} -cleanup { + testdstring free +} -result [list [list \\]] test dstring-3.1 {nested sublists} -constraints testdstring -setup { testdstring free -- cgit v0.12 From cbda5cb9b212067d1d831ec476057502e3c70531 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:41:06 +0000 Subject: Make Tcl_UtfToExternal()/Tcl_ExternalToUtf() report the error, if srcLen and dstLen are both > INT_MAX and therefore not all characters can be handled by this function. --- generic/tclEncoding.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ce5626f..67e67e9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1229,7 +1229,7 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1271,7 +1271,15 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { @@ -1467,7 +1475,7 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - Tcl_Size srcLen, /* Source string length in bytes, or < 0 for + Tcl_Size srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for * strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state @@ -1506,7 +1514,15 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START; + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + } else { + flags |= TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } statePtr = &state; } if (srcReadPtr == NULL) { -- cgit v0.12 From baf9b5e9bb89e1e13583fb510f6cb134d39126ff Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 16:54:02 +0000 Subject: Handle statePtr != NULL as well --- generic/tclEncoding.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 67e67e9..e639d3a 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1271,17 +1271,16 @@ Tcl_ExternalToUtf( srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } @@ -1514,17 +1513,16 @@ Tcl_UtfToExternal( srcLen = strlen(src); } if (statePtr == NULL) { - flags |= TCL_ENCODING_START; - if (srcLen > INT_MAX) { - srcLen = INT_MAX; - } else { - flags |= TCL_ENCODING_END; - } - if (dstLen > INT_MAX) { - dstLen = INT_MAX; - } + flags |= TCL_ENCODING_START | TCL_ENCODING_END; statePtr = &state; } + if (srcLen > INT_MAX) { + srcLen = INT_MAX; + flags &= ~TCL_ENCODING_END; + } + if (dstLen > INT_MAX) { + dstLen = INT_MAX; + } if (srcReadPtr == NULL) { srcReadPtr = &srcRead; } -- cgit v0.12 From 152d7203ac1b3f7f560995985c15f7527f2ecdc9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 17:19:07 +0000 Subject: Handle Tcl_UtfToExternal error in tclZlib.c --- generic/tclZlib.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 5a6dbc4..ea18c16 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -444,9 +444,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, - NULL); + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode comment", NULL); + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -465,8 +469,13 @@ GenerateHeader( goto error; } else if (value != NULL) { valueStr = Tcl_GetStringFromObj(value, &length); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + if (Tcl_UtfToExternal(NULL, latin1enc, valueStr, length, 0, NULL, + headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, + NULL) != TCL_OK) { + result = TCL_ERROR; + Tcl_AppendResult(interp, "Cannot encode filename", NULL); + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { -- cgit v0.12 From e7b8b9d2dd7951ecf0e3cbbcb618244fd7c45ebb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 20:12:43 +0000 Subject: Proposed fix for [f9eafc3886]: Error handling in zlib comment/filename. With testcases --- generic/tclZlib.c | 47 ++++++++++++++++++++++++++++++++++++++--------- tests/zlib.test | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 9 deletions(-) diff --git a/generic/tclZlib.c b/generic/tclZlib.c index 63a25fa..cbff7b7 100644 --- a/generic/tclZlib.c +++ b/generic/tclZlib.c @@ -441,10 +441,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "comment", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, headerPtr->nativeCommentBuf, MAX_COMMENT_LEN-1, NULL, &len, NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Comment contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Comment too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeCommentBuf[len] = '\0'; headerPtr->header.comment = (Bytef *) headerPtr->nativeCommentBuf; if (extraSizePtr != NULL) { @@ -462,9 +473,21 @@ GenerateHeader( if (GetValue(interp, dictObj, "filename", &value) != TCL_OK) { goto error; } else if (value != NULL) { + Tcl_EncodingState state; valueStr = Tcl_GetStringFromObj(value, &len); - Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, 0, NULL, - headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, NULL); + result = Tcl_UtfToExternal(NULL, latin1enc, valueStr, len, + TCL_ENCODING_START|TCL_ENCODING_END|TCL_ENCODING_STOPONERROR, &state, + headerPtr->nativeFilenameBuf, MAXPATHLEN-1, NULL, &len, + NULL); + if (result != TCL_OK) { + if (result == TCL_CONVERT_UNKNOWN) { + Tcl_AppendResult(interp, "Filename contains characters > 0xFF", NULL); + } else { + Tcl_AppendResult(interp, "Filename too large for zip", NULL); + } + result = TCL_ERROR; + goto error; + } headerPtr->nativeFilenameBuf[len] = '\0'; headerPtr->header.name = (Bytef *) headerPtr->nativeFilenameBuf; if (extraSizePtr != NULL) { @@ -1189,7 +1212,8 @@ Tcl_ZlibStreamPut( { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; char *dataTmp = NULL; - int e, size, outSize, toStore; + int e; + int size, outSize, toStore; if (zshPtr->streamEnd) { if (zshPtr->interp) { @@ -1312,7 +1336,8 @@ Tcl_ZlibStreamGet( * may get less! */ { ZlibStreamHandle *zshPtr = (ZlibStreamHandle *) zshandle; - int e, i, listLen, itemLen, dataPos = 0; + int e; + int i, listLen, itemLen, dataPos = 0; Tcl_Obj *itemObj; unsigned char *dataPtr, *itemPtr; int existing; @@ -1561,7 +1586,8 @@ Tcl_ZlibDeflate( int level, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, extraSize = 0; + int wbits = 0, e = 0, extraSize = 0; + int inLen = 0; Byte *inData = NULL; z_stream stream; GzipHeader header; @@ -1711,7 +1737,8 @@ Tcl_ZlibInflate( int bufferSize, Tcl_Obj *gzipHeaderDictObj) { - int wbits = 0, inLen = 0, e = 0, newBufferSize; + int wbits = 0, e = 0; + int inLen = 0, newBufferSize; Byte *inData = NULL, *outData = NULL, *newOutData = NULL; z_stream stream; gz_header header, *headerPtr = NULL; @@ -2365,7 +2392,8 @@ ZlibPushSubcmd( const char *const *pushOptions = pushDecompressOptions; enum pushOptions {poDictionary, poHeader, poLevel, poLimit}; Tcl_Obj *headerObj = NULL, *compDictObj = NULL; - int limit = DEFAULT_BUFFER_SIZE, dummy; + int limit = DEFAULT_BUFFER_SIZE; + int dummy; if (objc < 4) { Tcl_WrongNumArgs(interp, 2, objv, "mode channel ?options...?"); @@ -2897,7 +2925,8 @@ ZlibTransformClose( Tcl_Interp *interp) { ZlibChannelData *cd = (ZlibChannelData *)instanceData; - int e, written, result = TCL_OK; + int e, result = TCL_OK; + int written; /* * Delete the support timer. diff --git a/tests/zlib.test b/tests/zlib.test index 7ddf1d7..c3e344c 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,6 +486,54 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} +test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment too large for zip} +test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] filename] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename too large for zip} +test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list comment \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Comment contains characters > 0xFF} +test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { + set file [makeFile {} test.gz] +} -body { + set f [zlib push gzip [open $file w] -header [list filename \u100]] + puts $f "ok" + close $f + set f [zlib push gunzip [open $file]] + list [gets $f] [dict get [chan configure $f -header] comment] +} -cleanup { + close $f + removeFile $file +} -returnCodes 1 -result {Filename contains characters > 0xFF} test zlib-9.1 "check fcopy with push" -constraints zlib -setup { set sfile [makeFile {} testsrc.gz] -- cgit v0.12 From 6b172a213198a8d51d9b0b7783e0df3adc71bfe6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 26 Feb 2023 21:37:15 +0000 Subject: fill in bug ticket-nr --- tests/zlib.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index c3e344c..61e14bb 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -486,7 +486,7 @@ test zlib-8.18 {Bug dd260aaf: fconfigure} -setup { catch {close $inSide} catch {close $outSide} } -result {{one two} {one two}} -test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] @@ -498,7 +498,7 @@ test zlib-8.19 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment too large for zip} -test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] @@ -510,7 +510,7 @@ test zlib-8.20 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Filename too large for zip} -test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] @@ -522,7 +522,7 @@ test zlib-8.21 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { close $f removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} -test zlib-8.22 {zlib transformation, bug XXXXXXXXXX} -constraints zlib -setup { +test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] -- cgit v0.12 From ed360ca42f9908e5d8da5e8f4742b07d0b148b17 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 03:13:38 +0000 Subject: Add perf constraint to large io tests to prevent memory faults on systems with limited memory --- tests/io.test | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/io.test b/tests/io.test index 20b240f..c245add 100644 --- a/tests/io.test +++ b/tests/io.test @@ -196,7 +196,7 @@ test io-1.9 {Tcl_WriteChars: WriteChars} { } {19 19 19 19 19} test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.10.tmp] } -cleanup { @@ -209,7 +209,7 @@ test io-1.10 {WriteChars: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 2147483648 test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.11.tmp] } -cleanup { @@ -222,7 +222,7 @@ test io-1.11 {WriteChars: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints file size $tmpfile } -result 4294967296 test io-1.12 {WriteChars: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-1.12.tmp] } -cleanup { @@ -279,7 +279,7 @@ test io-2.4 {WriteBytes: reset sawLF after each buffer} { } [list "abcdefg\nhijklmno" "abcdefg\nhijklmnopqrstuvwxyz"] test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.5.tmp] } -cleanup { @@ -292,7 +292,7 @@ test io-2.5 {WriteBytes: large file (> INT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 2147483648 test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.6.tmp] } -cleanup { @@ -305,7 +305,7 @@ test io-2.6 {WriteBytes: large file (> UINT_MAX). Bug 3d01d51bc4} -constraints { file size $tmpfile } -result 4294967296 test io-2.7 {WriteBytes: large file (== UINT_MAX). Bug 90ff9b7f73} -constraints { - pointerIs64bit + pointerIs64bit perf } -setup { set tmpfile [file join [temporaryDirectory] io-2.7.tmp] } -cleanup { -- cgit v0.12 From e2d89615d52e47ed3b683498567e058e809aea39 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 04:15:07 +0000 Subject: Tests for encoding strings > 4GB (under perf constraint) --- tests/encoding.test | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/encoding.test b/tests/encoding.test index e0e1598..8b14353 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -1046,6 +1046,32 @@ test encoding-29.0 {get encoding nul terminator lengths} -constraints { [testencoding nullength ksc5601] } -result {1 2 4 2 2} +test encoding-30.0 {encoding convertto large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967295 1} + +test encoding-30.1 {encoding convertto large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertto ascii $s]] +} -result {4294967296 1} + +test encoding-30.2 {encoding convertfrom large strings UINT_MAX} -constraints { + perf +} -body { + # Test to ensure not misinterpreted as -1 + list [string length [set s [string repeat A 0xFFFFFFFF]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967295 1} + +test encoding-30.3 {encoding convertfrom large strings > 4GB} -constraints { + perf +} -body { + list [string length [set s [string repeat A 0x100000000]]] [string equal $s [encoding convertfrom ascii $s]] +} -result {4294967296 1} + # cleanup namespace delete ::tcl::test::encoding -- cgit v0.12 From 85bf0db1e84ab483fce7962c151bedeb3f5e0993 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Mon, 27 Feb 2023 12:31:49 +0000 Subject: Fix crash. int->size_t needs +1 in comparisons. --- generic/tclEncoding.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a6ecc26..d0756c7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1473,7 +1473,7 @@ Tcl_UtfToExternalDStringEx( !(result == TCL_CONVERT_MULTIBYTE && (flags & TCL_ENCODING_END))) { size_t i = soFar + encodingPtr->nullSize - 1; /* Loop as DStringSetLength only stores one nul byte at a time */ - while (i >= soFar) { + while (i+1 >= soFar+1) { Tcl_DStringSetLength(dstPtr, i--); } return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart); -- cgit v0.12 From a1fe72fa4a3bf6c99720ce309d0611a5d941ea93 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 27 Feb 2023 20:50:33 +0000 Subject: Fix testcases --- tests/zlib.test | 24 ++++-------------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/tests/zlib.test b/tests/zlib.test index 61e14bb..5312d2b 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -490,48 +490,32 @@ test zlib-8.19 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment [string repeat A 500]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment too large for zip} test zlib-8.20 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename [string repeat A 5000]]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] filename] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename too large for zip} test zlib-8.21 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list comment \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Comment contains characters > 0xFF} test zlib-8.22 {zlib transformation, bug f9eafc3886} -constraints zlib -setup { set file [makeFile {} test.gz] } -body { set f [zlib push gzip [open $file w] -header [list filename \u100]] - puts $f "ok" - close $f - set f [zlib push gunzip [open $file]] - list [gets $f] [dict get [chan configure $f -header] comment] } -cleanup { - close $f + catch {close $f} removeFile $file } -returnCodes 1 -result {Filename contains characters > 0xFF} -- cgit v0.12 From a947270ff77379afdeda26a33f5f444337b820bc Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 07:45:27 +0000 Subject: In case of combining TIP #494 (TCL_8_COMPAT) and #628 (building for Tcl 8.7 with 9.0 headers), ignore TCL_8_COMPAT macro. More Tcl_Size usage. --- generic/tcl.h | 14 +++++++------- generic/tclDecls.h | 2 +- generic/tclTest.c | 22 +++++++++++----------- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index fa4da26..1a1452e 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -311,10 +311,10 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt; #define Tcl_WideAsDouble(val) ((double)((Tcl_WideInt)(val))) #define Tcl_DoubleAsWide(val) ((Tcl_WideInt)((double)(val))) -#if TCL_MAJOR_VERSION > 8 -typedef size_t Tcl_Size; -#else +#if TCL_MAJOR_VERSION < 9 typedef int Tcl_Size; +#else +typedef size_t Tcl_Size; #endif #ifdef _WIN32 @@ -452,17 +452,17 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData); #if TCL_MAJOR_VERSION > 8 typedef struct Tcl_RegExpIndices { - size_t start; /* Character offset of first character in + Tcl_Size start; /* Character offset of first character in * match. */ - size_t end; /* Character offset of first character after + Tcl_Size end; /* Character offset of first character after * the match. */ } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { - size_t nsubs; /* Number of subexpressions in the compiled + Tcl_Size nsubs; /* Number of subexpressions in the compiled * expression. */ Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ - size_t extendStart; /* The offset at which a subsequent match + Tcl_Size extendStart; /* The offset at which a subsequent match * might begin. */ } Tcl_RegExpInfo; #else diff --git a/generic/tclDecls.h b/generic/tclDecls.h index f219500..ed2eb74 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4229,7 +4229,7 @@ extern const TclStubs *tclStubsPtr; #define Tcl_GlobalEvalObj(interp, objPtr) \ Tcl_EvalObjEx(interp, objPtr, TCL_EVAL_GLOBAL) -#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) +#if defined(TCL_8_COMPAT) && !defined(BUILD_tcl) && TCL_MAJOR_VERSION > 8 # ifdef USE_TCL_STUBS # undef Tcl_Gets # undef Tcl_GetsObj diff --git a/generic/tclTest.c b/generic/tclTest.c index 652c5aa..b6c7f77 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -4105,7 +4105,7 @@ TestregexpObjCmd( Tcl_Obj *const objv[]) /* Argument objects. */ { int i, indices, stringLength, match, about; - size_t ii; + Tcl_Size ii; int hasxflags, cflags, eflags; Tcl_RegExp regExpr; const char *string; @@ -4217,7 +4217,7 @@ TestregexpObjCmd( if (objc > 2 && (cflags®_EXPECT) && indices) { const char *varName; const char *value; - size_t start, end; + Tcl_Size start, end; char resinfo[TCL_INTEGER_SPACE * 2]; varName = Tcl_GetString(objv[2]); @@ -4257,11 +4257,11 @@ TestregexpObjCmd( Tcl_RegExpGetInfo(regExpr, &info); for (i = 0; i < objc; i++) { - size_t start, end; + Tcl_Size start, end; Tcl_Obj *newPtr, *varPtr, *valuePtr; varPtr = objv[i]; - ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (size_t)i; + ii = ((cflags®_EXPECT) && i == objc-1) ? TCL_INDEX_NONE : (Tcl_Size)i; if (indices) { Tcl_Obj *objs[2]; @@ -6476,10 +6476,10 @@ static int TestWrongNumArgsObjCmd( TCL_UNUSED(void *), Tcl_Interp *interp, /* Current interpreter. */ - size_t objc, /* Number of arguments. */ + Tcl_Size objc, /* Number of arguments. */ Tcl_Obj *const objv[]) /* Argument objects. */ { - size_t i, length; + Tcl_Size i, length; const char *msg; if (objc + 1 < 4) { @@ -7187,7 +7187,7 @@ TestUtfPrevCmd( int objc, Tcl_Obj *const objv[]) { - size_t numBytes, offset; + Tcl_Size numBytes, offset; char *bytes; const char *result; @@ -7228,7 +7228,7 @@ TestNumUtfCharsCmd( Tcl_Obj *const objv[]) { if (objc > 1) { - size_t numBytes, len, limit = TCL_INDEX_NONE; + Tcl_Size numBytes, len, limit = TCL_INDEX_NONE; const char *bytes = Tcl_GetStringFromObj(objv[1], &numBytes); if (objc > 2) { @@ -7296,7 +7296,7 @@ TestGetIntForIndexCmd( int objc, Tcl_Obj *const objv[]) { - size_t result; + Tcl_Size result; Tcl_WideInt endvalue; if (objc != 3) { @@ -7415,7 +7415,7 @@ TestHashSystemHashCmd( Tcl_SetHashValue(hPtr, INT2PTR(i+42)); } - if (hash.numEntries != (size_t)limit) { + if (hash.numEntries != (Tcl_Size)limit) { Tcl_AppendResult(interp, "unexpected maximal size", NULL); Tcl_DeleteHashTable(&hash); return TCL_ERROR; @@ -8175,7 +8175,7 @@ static int InterpCompiledVarResolver( TCL_UNUSED(Tcl_Interp *), const char *name, - TCL_UNUSED(size_t) /*length*/, + TCL_UNUSED(Tcl_Size) /*length*/, TCL_UNUSED(Tcl_Namespace *), Tcl_ResolvedVarInfo **rPtr) { -- cgit v0.12 From e743d3e48700a8b562d4a7e3893c856532ca107c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 09:57:22 +0000 Subject: Fix formatting issue in Tcl.n --- doc/Tcl.n | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/Tcl.n b/doc/Tcl.n index 8e0b342..0f784af 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -156,6 +156,8 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS +.RS +.RS .TP 7 \e\fBa\fR Audible alert (bell) (Unicode U+000007). @@ -222,6 +224,7 @@ inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. .RE +.RE .PP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. -- cgit v0.12 From 8b417f0e1ec2ff11fe856ea3d521356489c8dae0 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 09:59:29 +0000 Subject: Fix formatting issue in Tcl.n --- doc/Tcl.n | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/Tcl.n b/doc/Tcl.n index 8e0b342..0f784af 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -156,6 +156,8 @@ special processing. The following table lists the backslash sequences that are handled specially, along with the value that replaces each sequence. .RS +.RS +.RS .TP 7 \e\fBa\fR Audible alert (bell) (Unicode U+000007). @@ -222,6 +224,7 @@ inserted, in the range U+000000\(enU+10FFFF. The parser will stop just before this range overflows, or when the maximum of eight digits is reached. The upper bits of the Unicode character will be 0. .RE +.RE .PP Backslash substitution is not performed on words enclosed in braces, except for backslash-newline as described above. -- cgit v0.12 From 7d6cf09e029257c1c0656f2cd9253a4436e6a27c Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 10:04:49 +0000 Subject: Make the descriptions in doc/Tcl.n more concise and intuitive. --- doc/Tcl.n | 315 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 194 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f784af..d13f3ea 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,178 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: .RS .RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -195,78 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. -.RE +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: -- cgit v0.12 From 57d266423a5638cbedc01dc406d5af47a146ca20 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 10:13:25 +0000 Subject: Make the descriptions in doc/Tcl.n more concise and intuitive. --- doc/Tcl.n | 315 ++++++++++++++++++++++++-------------------------------------- 1 file changed, 121 insertions(+), 194 deletions(-) diff --git a/doc/Tcl.n b/doc/Tcl.n index 0f784af..d13f3ea 100644 --- a/doc/Tcl.n +++ b/doc/Tcl.n @@ -1,6 +1,7 @@ '\" '\" Copyright (c) 1993 The Regents of the University of California. '\" Copyright (c) 1994-1996 Sun Microsystems, Inc. +'\" Copyright (c) 2023 Nathan Coulter '\" '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. @@ -16,178 +17,152 @@ Summary of Tcl language syntax. .SH DESCRIPTION .PP The following rules define the syntax and semantics of the Tcl language: -.IP "[1] \fBCommands.\fR" -A Tcl script is a string containing one or more commands. -Semi-colons and newlines are command separators unless quoted as -described below. -Close brackets are command terminators during command substitution -(see below) unless quoted. -.IP "[2] \fBEvaluation.\fR" -A command is evaluated in two steps. -First, the Tcl interpreter breaks the command into \fIwords\fR -and performs substitutions as described below. -These substitutions are performed in the same way for all -commands. -Secondly, the first word is used to locate a routine to -carry out the command, and the remaining words of the command are -passed to that routine. -The routine is free to interpret each of its words -in any way it likes, such as an integer, variable name, list, -or Tcl script. -Different commands interpret their words differently. -.IP "[3] \fBWords.\fR" -Words of a command are separated by white space (except for -newlines, which are command separators). -.IP "[4] \fBDouble quotes.\fR" -If the first character of a word is double-quote +. +.IP "[1] \fBScript.\fR" +A script is composed of zero or more commands delimited by semi-colons or +newlines. +.IP "[2] \fBCommand.\fR" +A command is composed of zero or more words delimited by whitespace. The +replacement for a substitution is included verbatim in the word. For example, a +space in the replacement is included in the word rather than becoming a +delimiter, and \fI\\\\\fR becomes a single backslash in the word. Each word is +processed from left to right and each substitution is performed as soon as it +is complete. +For example, the command +.RS +.PP +.CS +set y [set x 0][incr x][incr x] +.CE +.PP +is composed of three words, and sets the value of \fIy\fR to \fI012\fR. +.PP +If hash +.PQ # +is the first character of what would otherwise be the first word of a command, +all characters up to the next newline are ignored. +.RE +. +.IP "[3] \fBBraced word.\fR" +If a word is enclosed in braces +.PQ { +and +.PQ } "" +, the braces are removed and the enclosed characters become the word. No +substitutions are performed. Nested pairs of braces may occur within the word. +A brace preceded by an odd number of backslashes is not considered part of a +pair, and neither brace nor the backslashes are removed from the word. +. +.IP "[4] \fBQuoted word.\fR" +If a word is enclosed in double quotes .PQ \N'34' -then the word is terminated by the next double-quote character. -If semi-colons, close brackets, or white space characters -(including newlines) appear between the quotes then they are treated -as ordinary characters and included in the word. -Command substitution, variable substitution, and backslash substitution -are performed on the characters between the quotes as described below. -The double-quotes are not retained as part of the word. -.IP "[5] \fBArgument expansion.\fR" -If a word starts with the string -.QW {*} -followed by a non-whitespace character, then the leading +, the double quotes are removed and the enclosed characters become the word. +Substitutions are performed. +. +.IP "[5] \fBList.\fR" +A list has the form of a single command. Newline is whitespace, and semicolon +has no special interpretation. There is no script evaluation so there is no +argument expansion, variable substitution, or command substitution: Dollar-sign +and open bracket have no special interpretation, and what would be argument +expansion in a script is invalid in a list. +. +.IP "[6] \fBArgument expansion.\fR" +If .QW {*} -is removed and the rest of the word is parsed and substituted as any other -word. After substitution, the word is parsed as a list (without command or -variable substitutions; backslash substitutions are performed as is normal for -a list and individual internal words may be surrounded by either braces or -double-quote characters), and its words are added to the command being -substituted. For instance, -.QW "cmd a {*}{b [c]} d {*}{$e f {g h}}" +prefixes a word, it is removed. After any remaining enclosing braces or quotes +are processed and applicable substitutions performed, the word, which must +be a list, is removed from the command, and in its place each word in the +list becomes an additional word in the command. For example, +.CS +cmd a {*}{b [c]} d {*}{$e f {g h}} +.CE is equivalent to -.QW "cmd a b {[c]} d {$e} f {g h}" . -.IP "[6] \fBBraces.\fR" -If the first character of a word is an open brace -.PQ { -and rule [5] does not apply, then -the word is terminated by the matching close brace -.PQ } "" . -Braces nest within the word: for each additional open -brace there must be an additional close brace (however, -if an open brace or close brace within the word is -quoted with a backslash then it is not counted in locating the -matching close brace). -No substitutions are performed on the characters between the -braces except for backslash-newline substitutions described -below, nor do semi-colons, newlines, close brackets, -or white space receive any special interpretation. -The word will consist of exactly the characters between the -outer braces, not including the braces themselves. -.IP "[7] \fBCommand substitution.\fR" -If a word contains an open bracket +.CS +cmd a b {[c]} d {$e} f {g h} . +.CE +. +.IP "[7] \fBEvaluation.\fR" +To evaluate a script, an interpreter evaluates each successive command. The +first word identifies a procedure, and the remaining words are passed to that +procedure for further evaluation. The procedure interprets each argument in +its own way, e.g. as an integer, variable name, list, mathematical expression, +script, or in some other arbitrary way. The result of the last command is the +result of the script. +. +.IP "[8] \fBCommand substitution.\fR" +Each pair of brackets .PQ [ -then Tcl performs \fIcommand substitution\fR. -To do this it invokes the Tcl interpreter recursively to process -the characters following the open bracket as a Tcl script. -The script may contain any number of commands and must be terminated -by a close bracket -.PQ ] "" . -The result of the script (i.e. the result of its last command) is -substituted into the word in place of the brackets and all of the -characters between them. -There may be any number of command substitutions in a single word. -Command substitution is not performed on words enclosed in braces. -.IP "[8] \fBVariable substitution.\fR" -If a word contains a dollar-sign +and +.PQ ] "" +encloses a script and is replaced by the result of that script. +.IP "[9] \fBVariable substitution.\fR" +Each of the following forms begins with dollar sign .PQ $ -followed by one of the forms -described below, then Tcl performs \fIvariable -substitution\fR: the dollar-sign and the following characters are -replaced in the word by the value of a variable. -Variable substitution may take any of the following forms: +and is replaced by the value of the identified variable. \fIname\fR names the +variable and is composed of ASCII letters (\fBA\fR\(en\fBZ\fR and +\fBa\fR\(en\fBz\fR), digits (\fB0\fR\(en\fB9\fR), underscores, or namespace +delimiters (two or more colons). \fIindex\fR is the name of an individual +variable within an array variable, and may be empty. .RS .TP 15 \fB$\fIname\fR . -\fIName\fR is the name of a scalar variable; the name is a sequence -of one or more characters that are a letter, digit, underscore, -or namespace separators (two or more colons). -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). +\fIname\fR may not be empty. + .TP 15 \fB$\fIname\fB(\fIindex\fB)\fR . -\fIName\fR gives the name of an array variable and \fIindex\fR gives -the name of an element within that array. -\fIName\fR must contain only letters, digits, underscores, and -namespace separators, and may be an empty string. -Letters and digits are \fIonly\fR the standard ASCII ones (\fB0\fR\(en\fB9\fR, -\fBA\fR\(en\fBZ\fR and \fBa\fR\(en\fBz\fR). -Command substitutions, variable substitutions, and backslash -substitutions are performed on the characters of \fIindex\fR. +\fIname\fR may be empty. Substitutions are performed on \fIindex\fR. .TP 15 \fB${\fIname\fB}\fR +\fIname\fR may be empty. +.TP 15 +\fB${\fIname(index)\fB}\fR . -\fIName\fR is the name of a scalar variable or array element. It may contain -any characters whatsoever except for close braces. It indicates an array -element if \fIname\fR is in the form -.QW \fIarrayName\fB(\fIindex\fB)\fR -where \fIarrayName\fR does not contain any open parenthesis characters, -.QW \fB(\fR , -or close brace characters, -.QW \fB}\fR , -and \fIindex\fR can be any sequence of characters except for close brace -characters. No further -substitutions are performed during the parsing of \fIname\fR. -.PP -There may be any number of variable substitutions in a single word. -Variable substitution is not performed on words enclosed in braces. -.PP -Note that variables may contain character sequences other than those listed -above, but in that case other mechanisms must be used to access them (e.g., -via the \fBset\fR command's single-argument form). +\fIname\fR may be empty. No substitutions are performed. .RE -.IP "[9] \fBBackslash substitution.\fR" -If a backslash +Variables that are not accessible through one of the forms above may be +accessed through other mechanisms, e.g. the \fBset\fR command. +.IP "[10] \fBBackslash substitution.\fR" +Each backslash .PQ \e -appears within a word then \fIbackslash substitution\fR occurs. -In all cases but those described below the backslash is dropped and -the following character is treated as an ordinary -character and included in the word. -This allows characters such as double quotes, close brackets, -and dollar signs to be included in words without triggering -special processing. -The following table lists the backslash sequences that are -handled specially, along with the value that replaces each sequence. +that is not part of one of the forms listed below is removed, and the next +character is included in the word verbatim, which allows the inclusion of +characters that would normally be interpreted, namely whitespace, braces, +brackets, double quote, dollar sign, and backslash. The following sequences +are replaced as described: .RS .RS .RS .TP 7 \e\fBa\fR -Audible alert (bell) (Unicode U+000007). +Audible alert (bell) (U+7). .TP 7 \e\fBb\fR -Backspace (Unicode U+000008). +Backspace (U+8). .TP 7 \e\fBf\fR -Form feed (Unicode U+00000C). +Form feed (U+C). .TP 7 \e\fBn\fR -Newline (Unicode U+00000A). +Newline (U+A). .TP 7 \e\fBr\fR -Carriage-return (Unicode U+00000D). +Carriage-return (U+D). .TP 7 \e\fBt\fR -Tab (Unicode U+000009). +Tab (U+9). .TP 7 \e\fBv\fR -Vertical tab (Unicode U+00000B). +Vertical tab (U+B). .TP 7 \e\fB\fIwhiteSpace\fR . -A single space character replaces the backslash, newline, and all spaces -and tabs after the newline. This backslash sequence is unique in that it -is replaced in a separate pre-pass before the command is actually parsed. -This means that it will be replaced even when it occurs between braces, -and the resulting space will be treated as a word separator if it is not -in braces or quotes. +Newline preceded by an odd number of backslashes, along with the consecutive +spaces and tabs that immediately follow it, is replaced by a single space. +Because this happens before the command is split into words, it occurs even +within braced words, and if the resulting space may subsequently be treated as +a word delimiter. .TP 7 \e\e Backslash @@ -195,78 +170,30 @@ Backslash .TP 7 \e\fIooo\fR . -The digits \fIooo\fR (one, two, or three of them) give a eight-bit octal -value for the Unicode character that will be inserted, in the range -\fI000\fR\(en\fI377\fR (i.e., the range U+000000\(enU+0000FF). -The parser will stop just before this range overflows, or when -the maximum of three digits is reached. The upper bits of the Unicode -character will be 0. +Up to three octal digits form an eight-bit value for a Unicode character in the +range \fI0\fR\(en\fI377\fR, i.e. U+0\(enU+FF. Only the digits that result in a +number in this range are consumed. .TP 7 \e\fBx\fIhh\fR . -The hexadecimal digits \fIhh\fR (one or two of them) give an eight-bit -hexadecimal value for the Unicode character that will be inserted. The upper -bits of the Unicode character will be 0 (i.e., the character will be in the -range U+000000\(enU+0000FF). +Up to two hexadecimal digits form an eight-bit value for a Unicode character in +the range \fI0\fR\(en\fIFF\fR. .TP 7 \e\fBu\fIhhhh\fR . -The hexadecimal digits \fIhhhh\fR (one, two, three, or four of them) give a -sixteen-bit hexadecimal value for the Unicode character that will be -inserted. The upper bits of the Unicode character will be 0 (i.e., the -character will be in the range U+000000\(enU+00FFFF). +Up to four hexadecimal digits form a 16-bit value for a Unicode character in +the range \fI0\fR\(en\fIFFFF\fR. .TP 7 \e\fBU\fIhhhhhhhh\fR . -The hexadecimal digits \fIhhhhhhhh\fR (one up to eight of them) give a -twenty-one-bit hexadecimal value for the Unicode character that will be -inserted, in the range U+000000\(enU+10FFFF. The parser will stop just -before this range overflows, or when the maximum of eight digits -is reached. The upper bits of the Unicode character will be 0. -.RE +Up to eight hexadecimal digits form a 21-bit value for a Unicode character in +the range \fI0\fR\(en\fI10FFFF\fR. Only the digits that result in a number in +this range are consumed. .RE -.PP -Backslash substitution is not performed on words enclosed in braces, -except for backslash-newline as described above. .RE -.IP "[10] \fBComments.\fR" -If a hash character -.PQ # -appears at a point where Tcl is -expecting the first character of the first word of a command, -then the hash character and the characters that follow it, up -through the next newline, are treated as a comment and ignored. -The comment character only has significance when it appears -at the beginning of a command. -.IP "[11] \fBOrder of substitution.\fR" -Each character is processed exactly once by the Tcl interpreter -as part of creating the words of a command. -For example, if variable substitution occurs then no further -substitutions are performed on the value of the variable; the -value is inserted into the word verbatim. -If command substitution occurs then the nested command is -processed entirely by the recursive call to the Tcl interpreter; -no substitutions are performed before making the recursive -call and no additional substitutions are performed on the result -of the nested script. -.RS .PP -Substitutions take place from left to right, and each substitution is -evaluated completely before attempting to evaluate the next. Thus, a -sequence like -.PP -.CS -set y [set x 0][incr x][incr x] -.CE -.PP -will always set the variable \fIy\fR to the value, \fI012\fR. .RE -.IP "[12] \fBSubstitution and word boundaries.\fR" -Substitutions do not affect the word boundaries of a command, -except for argument expansion as specified in rule [5]. -For example, during variable substitution the entire value of -the variable becomes part of a single word, even if the variable's -value contains spaces. +. .SH KEYWORDS backslash, command, comment, script, substitution, variable '\" Local Variables: -- cgit v0.12 From bb2f06fc739eb91a9f1499fbfbc4fa346172660e Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 11:42:15 +0000 Subject: Reverted [d156af9fb76dd2f4] and removed tests io-52.20 io-75.6 io-75.7, as this commit, intended to fix issue [b8f575aa2398b0e4], breaks the semantics of [read] and [gets]. Such a change would require an accepted TIP. See [b8f575aa2398b0e4] for further discussion. jn: @pouryorick See [b8f575aa2398b0e4] for the reason why this commit is not appropriate: It gets core-8-branch back in the buggy state it was, without even providing a real solution everyone agrees on. You shouldn't revert my patch just because I reverted yours. pooryorick: As I explained, the reason for this reversion is that it hard-codes an unapproved change in the semantics of [read] and [gets] into the test suite. Jan, your statement that it's a "revenge" reversion is false. I spent a month trying to find some alternative to this reversion before actually performing it. A commit that codifes in its tests changes in semantcs to [read]/[gets] simply shouldn't be on core-8-branch. --- generic/tclIO.c | 4 ++-- tests/io.test | 63 --------------------------------------------------------- 2 files changed, 2 insertions(+), 65 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 2e0cd1f..c96a406 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,7 +7588,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -8283,7 +8283,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { diff --git a/tests/io.test b/tests/io.test index 865ff7e..6821ff3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,27 +7609,6 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 -test io-52.20 {TclCopyChannel & encodings} -setup { - set out [open $path(utf8-fcopy.txt) w] - fconfigure $out -encoding utf-8 -translation lf - puts $out "Á" - close $out -} -constraints {fcopy} -body { - # binary to encoding => the input has to be - # in utf-8 to make sense to the encoder - - set in [open $path(utf8-fcopy.txt) r] - set out [open $path(kyrillic.txt) w] - - # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 - fconfigure $out -encoding koi8-r -translation lf - - fcopy $in $out -} -cleanup { - close $in - close $out -} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf @@ -9143,48 +9122,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 7661972907da4bc9f9d49d64cf6db1c0748936f9 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 28 Feb 2023 11:57:03 +0000 Subject: Reverted [d156af9fb76dd2f4] and removed tests io-52.20 io-75.6 io-75.7, as this commit, intended to fix issue [b8f575aa2398b0e4], breaks the semantics of [read] and [gets]. Such a change would require an accepted TIP. See [b8f575aa2398b0e4] for further discussion. jn: @pouryorick See [b8f575aa2398b0e4] for the reason why this commit is not appropriate: It gets core-8-branch back in the buggy state it was, without even providing a real solution everyone agrees on. You shouldn't revert my patch just because I reverted yours. pooryorick: As I explained, the reason for this reversion is that it hard-codes an unapproved change in the semantics of [read] and [gets] into the test suite. Jan, your statement that it's a "revenge" reversion is false. I spent a month trying to find some alternative to this reversion before actually performing it. A commit that codifes in its tests changes in semantcs to [read]/[gets] simply shouldn't be on core-8-branch. --- generic/tclIO.c | 4 ++-- tests/io.test | 63 --------------------------------------------------------- 2 files changed, 2 insertions(+), 65 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 768a5c5..36889c7 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7552,7 +7552,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - return (GotFlag(statePtr, CHANNEL_EOF) && !GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) ? 1 : 0; + return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } /* @@ -8223,7 +8223,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { diff --git a/tests/io.test b/tests/io.test index c245add..59b6c66 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7692,27 +7692,6 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 -test io-52.20 {TclCopyChannel & encodings} -setup { - set out [open $path(utf8-fcopy.txt) w] - fconfigure $out -encoding utf-8 -translation lf - puts $out "Á" - close $out -} -constraints {fcopy} -body { - # binary to encoding => the input has to be - # in utf-8 to make sense to the encoder - - set in [open $path(utf8-fcopy.txt) r] - set out [open $path(kyrillic.txt) w] - - # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -strictencoding 1 - fconfigure $out -encoding koi8-r -translation lf - - fcopy $in $out -} -cleanup { - close $in - close $out -} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf @@ -9223,48 +9202,6 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-nocomplainencoding 1)} -s removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.6] - set f [open $fn w+] - fconfigure $f -encoding binary - # \x81 is invalid in utf-8 - puts -nonewline $f A\x81 - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - close $f - lappend hd $msg -} -cleanup { - removeFile io-75.6 -} -match glob -result {41 1 {error reading "*": illegal byte sequence}} - -test io-75.7 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { - set fn [makeFile {} io-75.7] - set f [open $fn w+] - fconfigure $f -encoding binary - # \xA1 is invalid in utf-8. -eofchar is not detected, because it comes later. - puts -nonewline $f A\xA1\x1A - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -strictencoding 1 -} -body { - set d [read $f] - binary scan $d H* hd - lappend hd [eof $f] - lappend hd [catch {read $f} msg] - lappend hd $msg - fconfigure $f -encoding iso8859-1 - lappend hd [read $f];# We changed encoding, so now we can read the \xA1 - close $f - set hd -} -cleanup { - removeFile io-75.7 -} -match glob -result {41 0 1 {error reading "*": illegal byte sequence} ¡} - test io-75.8 {invalid utf-8 encoding eof handling (-strictencoding 1)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] -- cgit v0.12 From 70e40dd53dda7e7fca32fc8aec28cf6504e7ffec Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 12:16:10 +0000 Subject: Restore previous behavior for non-blocking mode, as for this mode the semantics of [read]/[gets] were not broken. This was the 'some agreement'. The change in line 8286 is necessary for both blocking and non-blocking mode: Whenver the encoding change we need to reset the CHANNEL_ENCODING_ERROR flag. --- generic/tclIO.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c96a406..ae09690 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,6 +7588,10 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ + if (GotFlag(statePtr, CHANNEL_NONBLOCKING) + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) { + return 0; + } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; } @@ -8283,7 +8287,7 @@ Tcl_SetChannelOption( statePtr->inputEncodingFlags = TCL_ENCODING_START; statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA); + ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR); UpdateInterest(chanPtr); return TCL_OK; } else if (HaveOpt(2, "-eofchar")) { -- cgit v0.12 From bb80189110af1df66a42c1d79285638ebc54f038 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 12:43:53 +0000 Subject: Missing ')' --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index ae09690..6da1345 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7589,7 +7589,7 @@ Tcl_Eof( /* State of real channel structure. */ if (GotFlag(statePtr, CHANNEL_NONBLOCKING) - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) { + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; -- cgit v0.12 From 4227ef37bbd67235ead035bb544b3bcf864b1e87 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 28 Feb 2023 13:33:04 +0000 Subject: Put back testcase io-52.20, and re-fix [4a7397e0b3] --- generic/tclIO.c | 7 ++++++- generic/tclIO.h | 2 ++ tests/io.test | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6da1345..da06171 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7588,7 +7588,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - if (GotFlag(statePtr, CHANNEL_NONBLOCKING) + if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } @@ -9803,6 +9803,7 @@ CopyData( * the bottom of the stack. */ + SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9918,6 +9919,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -10009,6 +10011,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -10031,6 +10034,7 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -10083,6 +10087,7 @@ CopyData( } } } + ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } diff --git a/generic/tclIO.h b/generic/tclIO.h index a69e990..689067f 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -233,6 +233,8 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ +#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy + * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/tests/io.test b/tests/io.test index 6821ff3..dd291dd 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7609,6 +7609,27 @@ test io-52.19 {coverage of eofChar handling} { close $out file size $path(test2) } 8 +test io-52.20 {TclCopyChannel & encodings} -setup { + set out [open $path(utf8-fcopy.txt) w] + fconfigure $out -encoding utf-8 -translation lf + puts $out "Á" + close $out +} -constraints {fcopy} -body { + # binary to encoding => the input has to be + # in utf-8 to make sense to the encoder + + set in [open $path(utf8-fcopy.txt) r] + set out [open $path(kyrillic.txt) w] + + # Using "-encoding ascii" means reading the "Á" gives an error + fconfigure $in -encoding ascii -strictencoding 1 + fconfigure $out -encoding koi8-r -translation lf + + fcopy $in $out +} -cleanup { + close $in + close $out +} -returnCodes 1 -match glob -result {error reading "file*": illegal byte sequence} test io-52.21 {TclCopyChannel & encodings} -setup { set out [open $path(utf8-fcopy.txt) w] fconfigure $out -encoding utf-8 -translation lf -- cgit v0.12