From c2fab9a915fccf8f4fa9f6f4b41c06332d8bedbf Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 21 Aug 2024 12:06:34 +0000 Subject: Start on bug [945d2387d7] --- generic/tclEncoding.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 64fb1b6..95acfa9 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2943,6 +2943,9 @@ Utf16ToUtfProc( *dst++ = (ch & 0xFF); } else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + if (HIGH_SURROGATE(prev) && LOW_SURROGATE(ch)) { + --numChars; /* Character has been combined, so compensage count */ + } } else if (LOW_SURROGATE(ch) && !PROFILE_TCL8(flags)) { /* Lo surrogate not preceded by Hi surrogate and not tcl8 profile */ if (PROFILE_STRICT(flags)) { -- cgit v0.12 From 123458fa6b9487cee12e4c1952c59b125b2274ed Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Thu, 22 Aug 2024 18:12:15 +0000 Subject: Proposed fixes for [945d2387d7], [f2e924e881], [33f9b4de51] --- generic/tclEncoding.c | 222 +++++++++++++++++++++++++++++++------------------- 1 file changed, 140 insertions(+), 82 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d235911..cb5dd20 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3009,6 +3009,7 @@ Utf16ToUtfProc( srcLen--; } +#if 0 /* * If last code point is a high surrogate, we cannot handle that yet, * unless we are at the end. @@ -3019,6 +3020,7 @@ Utf16ToUtfProc( result = TCL_CONVERT_MULTIBYTE; srcLen-= 2; } +#endif srcStart = src; srcEnd = src + srcLen; @@ -3028,7 +3030,7 @@ Utf16ToUtfProc( for (numChars = 0; src < srcEnd && numChars <= charLimit; src += 2, numChars++) { - if (dst > dstEnd) { + if (dst > dstEnd && !HIGH_SURROGATE(ch)) { result = TCL_CONVERT_NOSPACE; break; } @@ -3039,94 +3041,150 @@ Utf16ToUtfProc( } else { ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } - if (HIGH_SURROGATE(prev) && !LOW_SURROGATE(ch)) { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - src -= 2; /* Go back to beginning of high surrogate */ - dst--; /* Also undo writing a single byte too much */ - numChars--; - break; - } else if (PROFILE_REPLACE(flags)) { - /* - * Previous loop wrote a single byte to mark the high surrogate. - * Replace it with the replacement character. Further, restart - * current loop iteration since need to recheck destination - * space and reset processing of current character. - */ - ch = UNICODE_REPLACE_CHAR; - dst--; - dst += Tcl_UniCharToUtf(ch, dst); - src -= 2; - numChars--; - continue; - } else { - /* - * Bug [10c2c17c32]. If Hi surrogate not followed by Lo - * surrogate, finish 3-byte UTF-8 - */ - dst += Tcl_UniCharToUtf(-1, dst); - } - } - - /* - * Special case for 1-byte utf chars for speed. Make sure we work with - * unsigned short-size data. - */ - - if ((unsigned)ch - 1 < 0x7F) { - *dst++ = (ch & 0xFF); - } else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) { - dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); - if (HIGH_SURROGATE(prev) && LOW_SURROGATE(ch)) { - --numChars; /* Character has been combined, so compensage count */ + if (HIGH_SURROGATE(prev)) { + if (LOW_SURROGATE(ch)) { + /* + * High surrogate was followed by a low surrogate. + * Tcl_UniCharToUtf would have stashed away the state in dst. + * Call it again to combine that state with the low surrogate. + * We also have to compensate the numChars as two UTF-16 units + * have been combined into one character. + */ + dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + --numChars; + } else { + /* High surrogate was not followed by a low surrogate */ + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + src -= 2; /* Go back to beginning of high surrogate */ + dst--; /* Also undo writing a single byte too much */ + numChars--; + break; + } + if (PROFILE_REPLACE(flags)) { + /* + * Previous loop wrote a single byte to mark the high surrogate. + * Replace it with the replacement character. Further, restart + * current loop iteration since need to recheck destination + * space and reset processing of current character. + */ + ch = UNICODE_REPLACE_CHAR; + dst--; + dst += Tcl_UniCharToUtf(ch, dst); + } else { + /* + * Bug [10c2c17c32]. If Hi surrogate not followed by Lo + * surrogate, finish 3-byte UTF-8 + */ + dst += Tcl_UniCharToUtf(-1, dst); + } + /* Loop around again so destination space and other checks are done */ + prev = 0; /* Reset high surrogate tracker */ + src -= 2; + numChars--; } - } else if (LOW_SURROGATE(ch) && !PROFILE_TCL8(flags)) { - /* Lo surrogate not preceded by Hi surrogate and not tcl8 profile */ - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - break; - } else { - /* PROFILE_REPLACE */ - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - } } else { - dst += Tcl_UniCharToUtf(ch, dst); - } - } + /* Previous char was not a high surrogate */ - if (HIGH_SURROGATE(ch)) { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - src -= 2; - dst--; - numChars--; - } else if (PROFILE_REPLACE(flags)) { - dst--; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - } else { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); - } + /* + * Special case for 1-byte utf chars for speed. Make sure we work with + * unsigned short-size data. Order checks based on expected frequency. + */ + if ((unsigned)ch - 1 < 0x7F) { + /* ASCII except nul */ + *dst++ = (ch & 0xFF); + } else if (!SURROGATE(ch)) { + /* Not ASCII, not surrogate */ + dst += Tcl_UniCharToUtf(ch, dst); + } else if (HIGH_SURROGATE(ch)) { + dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else { + assert(LOW_SURROGATE(ch)); + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } + if (PROFILE_REPLACE(flags)) { + ch = UNICODE_REPLACE_CHAR; + } + dst += Tcl_UniCharToUtf(ch, dst); + } + } } /* - * If we had a truncated code unit at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. + * When the above loop ends, result may have the following values: + * 1. TCL_OK - full source buffer was completely processed. + * src, dst, numChars will hold values up to that point BUT + * there may be a leftover high surrogate we need to deal with. + * 2. TCL_CONVERT_NOSPACE - Ran out of room in the destination buffer. + * Same considerations as (1) + * 3. TCL_CONVERT_SYNTAX - decoding error. src, dst, numChars will + * hold the correct values up to the point of error even if the + * the last character decoded was a high surrogate. + * 4. TCL_CONVERT_MULTIBYTE - the buffer passed in was not fully + * processed, because there was a trailing single byte. However, + * we may have processed the requested number of characters already + * in which case the trailing byte does not matter. We still + * may still be a leftover high surrogate as in (1) and (2). */ - if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { - if (dst > dstEnd) { - result = TCL_CONVERT_NOSPACE; - } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src++; /* Go past truncated code unit */ - } - } + switch (result) { + case TCL_CONVERT_MULTIBYTE: /* FALLTHRU */ + case TCL_OK: /* FALLTHRU */ + case TCL_CONVERT_NOSPACE: + if (HIGH_SURROGATE(ch)) { + if (flags & TCL_ENCODING_END) { + /* + * No more data expected. There will be space for output of + * one character (essentially overwriting the dst area holding + * high surrogate state) + */ + assert((dst-1) <= dstEnd); + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + src -= 2; + dst--; + numChars--; + } else if (PROFILE_REPLACE(flags)) { + dst--; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } + } else { + /* More data is expected. Revert the surrogate state */ + src -= 2; + dst--; + numChars--; + /* Note: leave result of TCL_CONVERT_NOSPACE as is */ + if (result == TCL_OK) { + result = TCL_CONVERT_MULTIBYTE; + } + } + } else if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* + * If we had a trailing byte at the end AND this is the last + * fragment AND profile is not "strict", stick FFFD in its place. + * Note in this case we DO need to check for room in dst. + */ + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + } else { + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; + } + } + } + break; + case TCL_CONVERT_SYNTAX: + break; /* Nothing to do */ } *srcReadPtr = src - srcStart; -- cgit v0.12 From e3f4e1187fee73ffe66ff7924ed2f09b4db5cc61 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 23 Aug 2024 06:10:56 +0000 Subject: Reenable utf16 tests after fixes --- generic/tclEncoding.c | 22 +++++++++------------- tests/utfext.test | 4 ++-- 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index cb5dd20..8af87d3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3051,25 +3051,22 @@ Utf16ToUtfProc( * have been combined into one character. */ dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); - --numChars; } else { /* High surrogate was not followed by a low surrogate */ if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; src -= 2; /* Go back to beginning of high surrogate */ dst--; /* Also undo writing a single byte too much */ - numChars--; break; } if (PROFILE_REPLACE(flags)) { /* * Previous loop wrote a single byte to mark the high surrogate. - * Replace it with the replacement character. Further, restart - * current loop iteration since need to recheck destination - * space and reset processing of current character. + * Replace it with the replacement character. */ ch = UNICODE_REPLACE_CHAR; dst--; + numChars++; dst += Tcl_UniCharToUtf(ch, dst); } else { /* @@ -3081,7 +3078,6 @@ Utf16ToUtfProc( /* Loop around again so destination space and other checks are done */ prev = 0; /* Reset high surrogate tracker */ src -= 2; - numChars--; } } else { /* Previous char was not a high surrogate */ @@ -3098,6 +3094,8 @@ Utf16ToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); } else if (HIGH_SURROGATE(ch)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + /* Do not count this just yet. Compensate for numChars++ in loop counter */ + numChars--; } else { assert(LOW_SURROGATE(ch)); if (PROFILE_STRICT(flags)) { @@ -3119,14 +3117,12 @@ Utf16ToUtfProc( * there may be a leftover high surrogate we need to deal with. * 2. TCL_CONVERT_NOSPACE - Ran out of room in the destination buffer. * Same considerations as (1) - * 3. TCL_CONVERT_SYNTAX - decoding error. src, dst, numChars will - * hold the correct values up to the point of error even if the - * the last character decoded was a high surrogate. + * 3. TCL_CONVERT_SYNTAX - decoding error. * 4. TCL_CONVERT_MULTIBYTE - the buffer passed in was not fully * processed, because there was a trailing single byte. However, - * we may have processed the requested number of characters already + * we *may* have processed the requested number of characters already * in which case the trailing byte does not matter. We still - * may still be a leftover high surrogate as in (1) and (2). + * *may* still be a leftover high surrogate as in (1) and (2). */ switch (result) { case TCL_CONVERT_MULTIBYTE: /* FALLTHRU */ @@ -3144,19 +3140,19 @@ Utf16ToUtfProc( result = TCL_CONVERT_SYNTAX; src -= 2; dst--; - numChars--; } else if (PROFILE_REPLACE(flags)) { dst--; + numChars++; dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); } else { /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + numChars++; dst += Tcl_UniCharToUtf(-1, dst); } } else { /* More data is expected. Revert the surrogate state */ src -= 2; dst--; - numChars--; /* Note: leave result of TCL_CONVERT_NOSPACE as is */ if (result == TCL_OK) { result = TCL_CONVERT_MULTIBYTE; diff --git a/tests/utfext.test b/tests/utfext.test index 4b15a8d..01f5184 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -248,8 +248,8 @@ namespace eval utftest { testfragment fromutf $enc $comment $utfhex $hex $internalfragindex # Char limits - note no fromutf as Tcl_UtfToExternal does not support it - if {![string match utf-16* $enc] && $enc ne "cesu-8"} { - # TODO - utf16 hangs + if {$enc ne "cesu-8"} { + # TODO - cesu-8 testcharlimit toutf $enc $comment $hex $utfhex } } -- cgit v0.12 From d472e9ae5561f7d7f6e8b6b2de6efd6a56fa79ba Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 23 Aug 2024 08:10:20 +0000 Subject: Also fix charlimit for cesu-8 --- generic/tclEncoding.c | 7 ++++++- tests/utfext.test | 5 +---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8af87d3..3f86857 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2469,7 +2469,12 @@ UtfToUtfProc( dstStart = dst; flags |= PTR2INT(clientData); - dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6); + + /* + * If output is UTF-8 or encoding for Tcl's internal encoding, + * max space needed is TCL_UTF_MAX. Otherwise, need 6 bytes (CESU-8) + */ + dstEnd = dst + dstLen - ((flags & (ENCODING_INPUT|ENCODING_UTF)) ? TCL_UTF_MAX : 6); /* * Macro to output an isolated high surrogate when it is not followed diff --git a/tests/utfext.test b/tests/utfext.test index 01f5184..d5cec23 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -248,10 +248,7 @@ namespace eval utftest { testfragment fromutf $enc $comment $utfhex $hex $internalfragindex # Char limits - note no fromutf as Tcl_UtfToExternal does not support it - if {$enc ne "cesu-8"} { - # TODO - cesu-8 - testcharlimit toutf $enc $comment $hex $utfhex - } + testcharlimit toutf $enc $comment $hex $utfhex } } -- cgit v0.12 From 653e0a778fe3ba7f9f7341265060bfba4ee5d6b2 Mon Sep 17 00:00:00 2001 From: pooryorick Date: Tue, 27 Aug 2024 10:50:11 +0000 Subject: Fix logic that was inadvertently inverted back in [295715a1b1b2c3d5] --- generic/tclStringObj.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 258d02b..f4428d1 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3191,7 +3191,7 @@ TclStringCat( binary = 0; if (ov > objv+1 && ISCONTINUATION(TclGetString(objPtr))) { forceUniChar = 1; - } else if ((objPtr->typePtr) && TclHasInternalRep(objPtr, &tclStringType)) { + } else if ((objPtr->typePtr) && !TclHasInternalRep(objPtr, &tclStringType)) { /* Prevent shimmer of non-string types. */ allowUniChar = 0; } -- cgit v0.12 From 39c100c9b3ca73d645e13277ab18407815fe59d2 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Fri, 30 Aug 2024 03:24:46 +0000 Subject: Delete duplicated text fragment in lseq manpage --- doc/lseq.n | 6 ------ 1 file changed, 6 deletions(-) diff --git a/doc/lseq.n b/doc/lseq.n index 9e46f38..a0f3868 100644 --- a/doc/lseq.n +++ b/doc/lseq.n @@ -63,12 +63,6 @@ elements, and if \fIcount\fR is not supplied, it is computed as: \fIcount\fR = int( (\fIend\fR - \fIstart\fR + \fIstep\fR) / \fIstep\fR ) .CE .RE -.PP -The numeric arguments, \fIstart\fR, \fIend\fR, \fIstep\fR, and \fIcount\fR, -can also be a valid expression. the \fBlseq\fR command will evaluate the -expression (as if with \fBexpr\fR) -and use the numeric result, or return an error as with any invalid argument -value; a non-numeric expression result will result in an error. .SH EXAMPLES .CS .\" -- cgit v0.12 From cf8be2a4a6881ce0776abd08e4fafdd63d178523 Mon Sep 17 00:00:00 2001 From: griffin Date: Sun, 1 Sep 2024 22:43:33 +0000 Subject: Fix for ticket [f4a4bd7f1070] - lseq "count" argument also dictates output formatting. Solved by only consdering the start, end, and step values to determine the value formatting of the results. Real numbers can still be used for the Count value, as long as the value is a whole number. --- generic/tclCmdIL.c | 11 +++++++++-- tests/lseq.test | 21 +++++++++++++++++---- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/generic/tclCmdIL.c b/generic/tclCmdIL.c index 8c1c162..19a6226 100644 --- a/generic/tclCmdIL.c +++ b/generic/tclCmdIL.c @@ -4139,7 +4139,8 @@ Tcl_LseqObjCmd( Tcl_WideInt values[5]; Tcl_Obj *numValues[5]; Tcl_Obj *numberObj; - int status = TCL_ERROR, keyword, useDoubles = 0, allowedArgs = NumericArg; + int status = TCL_ERROR, keyword, allowedArgs = NumericArg; + int useDoubles = 0; int remNums = 3; Tcl_Obj *arithSeriesPtr; SequenceOperators opmode; @@ -4184,7 +4185,7 @@ Tcl_LseqObjCmd( } numValues[value_i] = numberObj; values[value_i] = keyword; /* TCL_NUMBER_* */ - useDoubles |= (keyword == TCL_NUMBER_DOUBLE) ? 1 : 0; + useDoubles += (keyword == TCL_NUMBER_DOUBLE) ? 1 : 0; value_i++; break; @@ -4213,6 +4214,10 @@ Tcl_LseqObjCmd( elementCount = numValues[0]; end = NULL; step = one; + useDoubles = 0; // Can only have Integer value. If a fractional value + // is given, this will fail later. In other words, + // "3.0" is allowed and used as Integer, but "3.1" + // will be flagged as an error. (bug f4a4bd7f1070) break; /* lseq n n */ @@ -4339,6 +4344,8 @@ Tcl_LseqObjCmd( /* Count needs to be integer, so try to convert if possible */ if (elementCount && TclHasInternalRep(elementCount, &tclDoubleType)) { double d; + // Don't consider Count type to indicate using double values in seqence + useDoubles -= (useDoubles > 0) ? 1 : 0; (void)Tcl_GetDoubleFromObj(NULL, elementCount, &d); if (floor(d) == d) { if ((d >= (double)WIDE_MAX) || (d <= (double)WIDE_MIN)) { diff --git a/tests/lseq.test b/tests/lseq.test index 74fbdfa..6feb940 100644 --- a/tests/lseq.test +++ b/tests/lseq.test @@ -144,10 +144,11 @@ test lseq-1.22 {n n by -n} { lseq 84 66 by -3 } {84 81 78 75 72 69 66} -test lseq-1.23 {consistence, accept double count representable as integer (but use double in series)} { +test lseq-1.23 {consistence, accept double count representable as integer (but use double in series when arguments other than count value are of type double)} { list [lseq 0.0 2.0] [lseq 3.0] [lseq 0 count 3.0] \ [lseq 0.0 count 3.0] [lseq 0 count 3.0 by 1.0] -} [lrepeat 5 {0.0 1.0 2.0}] +} {{0.0 1.0 2.0} {0 1 2} {0 1 2} {0.0 1.0 2.0} {0.0 1.0 2.0}} + test lseq-1.24 {consistence, use double (even if representable as integer) in all variants, if contains a double somewhere} { list [lseq 0.0 2] [lseq 0 2.0] [lseq 0.0 count 3] \ [lseq 0 count 3 by 1.0] [lseq 0 .. 2.0] [lseq 0 to 2 by 1.0] @@ -265,7 +266,7 @@ test lseq-2.19 {expressions as indices} { test lseq-2.20 {expressions as indices, no duplicative eval of expr} { set i 1 list [lseq {[incr i]}] $i [lseq {0 + [incr i]}] $i [lseq {0.0 + [incr i]}] $i -} {{0 1} 2 {0 1 2} 3 {0.0 1.0 2.0 3.0} 4} +} {{0 1} 2 {0 1 2} 3 {0 1 2 3} 4} test lseq-3.0 {expr error: don't swalow expr error (here: divide by zero)} -body { set i 0; lseq {3/$i} @@ -851,7 +852,19 @@ test lseq-bug-578b7e273c03-2 {Arithmetic Series Objects get wrong precision when lappend ll [llength [lseq 0 count 200 by .011]] } -result {100 200 100 200 100 200} - +test lseq-bug-f4a4bd7f1070-1 {} -body { + set result {} + lappend result [catch {lseq 3.1} msg] + lappend result $msg + lappend result [catch {lseq 5 count 3.0} msg] + lappend result $msg + lappend result [lseq 3] + lappend result [lseq 3.0] + lappend result [lseq 5.1e1] + lappend result [string compare [lseq 3] [lseq 3.0]] + set result +} -result {1 {expected integer but got "3.1"} 0 {5 6 7} {0 1 2} {0 1 2} {0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50} 0} + # cleanup ::tcltest::cleanupTests -- cgit v0.12 From 5bd4eed982f50d4dfe8245b2312028a5f12826df Mon Sep 17 00:00:00 2001 From: Torsten Date: Mon, 2 Sep 2024 11:00:42 +0000 Subject: cherrypicked typos, errors and clarifications from the documentation-cleanup-for-transition branch (those marked as 'Fix:') --- doc/ChnlStack.3 | 2 +- doc/CrtChnlHdlr.3 | 4 ++-- doc/apply.n | 2 +- doc/prefix.n | 4 ++-- doc/re_syntax.n | 2 -- doc/tclsh.1 | 6 +++--- doc/zipfs.n | 17 +++++++++++++---- 7 files changed, 22 insertions(+), 15 deletions(-) diff --git a/doc/ChnlStack.3 b/doc/ChnlStack.3 index ba7bc48..3a16c9d 100644 --- a/doc/ChnlStack.3 +++ b/doc/ChnlStack.3 @@ -87,7 +87,7 @@ channels the supplied channel is part of. channels which is just below the supplied channel. .SH "SEE ALSO" -Notifier(3), Tcl_CreateChannel(3), Tcl_OpenFileChannel(3), vwait(n). +Notifier(3), Tcl_CreateChannel(3), Tcl_OpenFileChannel(3), vwait(n) .SH KEYWORDS channel, compression diff --git a/doc/CrtChnlHdlr.3 b/doc/CrtChnlHdlr.3 index 5b0e724..cd817d1 100644 --- a/doc/CrtChnlHdlr.3 +++ b/doc/CrtChnlHdlr.3 @@ -81,6 +81,6 @@ is invoked. For this reason it may be useful to use nonblocking I/O on channels for which there are event handlers. .SH "SEE ALSO" -Notifier(3), Tcl_CreateChannel(3), Tcl_OpenFileChannel(3), vwait(n). +Notifier(3), Tcl_CreateChannel(3), Tcl_OpenFileChannel(3), vwait(n) .SH KEYWORDS -blocking, callback, channel, events, handler, nonblocking. +blocking, callback, channel, events, handler, nonblocking diff --git a/doc/apply.n b/doc/apply.n index 154ddff..be22361 100644 --- a/doc/apply.n +++ b/doc/apply.n @@ -96,7 +96,7 @@ set vbl abc .SH "SEE ALSO" proc(n), uplevel(n) .SH KEYWORDS -anonymous function, argument, lambda, procedure, +anonymous function, argument, lambda, procedure '\" Local Variables: '\" mode: nroff '\" End: diff --git a/doc/prefix.n b/doc/prefix.n index a2180e5..16049f3 100644 --- a/doc/prefix.n +++ b/doc/prefix.n @@ -35,14 +35,14 @@ Returns the longest common prefix of all elements in \fItable\fR that begin with the prefix \fIstring\fR. .\" METHOD: match .TP -\fB::tcl::prefix match\fR ?\fIoptions\fR? \fItable string\fR +\fB::tcl::prefix match\fR ?\fIoption ...\fR? \fItable string\fR . If \fIstring\fR equals one element in \fItable\fR or is a prefix to exactly one element, the matched element is returned. If not, the result depends on the \fB\-error\fR option. (It is recommended that the \fItable\fR be sorted before use with this subcommand, so that the list of matches presented in the error message also becomes sorted, though this is not strictly necessary for -the operation of this subcommand itself.) +the operation of this subcommand itself.) The following options are supported: .RS .\" OPTION: -exact .TP diff --git a/doc/re_syntax.n b/doc/re_syntax.n index 1ece560..b214349 100644 --- a/doc/re_syntax.n +++ b/doc/re_syntax.n @@ -6,8 +6,6 @@ '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" .so man.macros -.ie '\w'o''\w'\C'^o''' .ds qo \C'^o' -.el .ds qo u .TH re_syntax n "8.1" Tcl "Tcl Built-In Commands" .BS .SH NAME diff --git a/doc/tclsh.1 b/doc/tclsh.1 index 894b21e..1060fa2 100644 --- a/doc/tclsh.1 +++ b/doc/tclsh.1 @@ -166,9 +166,9 @@ VFS. If a file named \fBmain.tcl\fR is present in the top level directory of the zip archive, it will be sourced instead of tclsh's normal command line handing. If a top level directory \fBtcl_library\fR is present in the zip archive, it will become the directory loaded as -env(TCL_LIBRARY). If the file \fBtcl_library/init.tcl\fR is present in the zip -archive, the \fBtcl_library\fR global variable in the initial Tcl interpreter -is set to \fBapp/tcl_library\fR. +env(TCL_LIBRARY). If a file named \fBmain.tcl\fR is present in the top +level directory of the zip archive, it will be sourced instead of the +shell's normal command line handling. .PP Only one zipfile can be concatenated to the end of executable image (tclsh, or wish). However, if multiple zipfiles are diff --git a/doc/zipfs.n b/doc/zipfs.n index 2cf00aa..b0189f6 100644 --- a/doc/zipfs.n +++ b/doc/zipfs.n @@ -185,12 +185,14 @@ stripped prefix) determines the later root name of the archive's content. . Creates an image (potentially a new executable file) similar to \fBzipfs mkzip\fR; see that command for a description of most parameters to this -command, as they behave identically here. +command, as they behave identically here. If \fIoutfile\fR exists, it will +be silently overwritten. .RS .PP If the \fIinfile\fR parameter is specified, this file is prepended in front of the ZIP archive, otherwise the file returned by \fBinfo nameofexecutable\fR -(i.e., the executable file of the running process) is used. If the +(i.e., the executable file of the running process, +typically \fBwish\fR or \fBtclsh\fR) is used. If the \fIpassword\fR parameter is not the empty string, an obfuscated version of that password (see \fBzipfs mkkey\fR) is placed between the image and ZIP chunks of the output file and the contents of the ZIP chunk are protected with that @@ -289,6 +291,10 @@ set base [file join [\fBzipfs root\fR] myApp] \fBzipfs mount\fR $zip $base $password .CE .PP +The following example creates an executable application by appending a ZIP archive +to the tclsh file it was called from and storing the resulting executable in +the file +.QW myApp.bin . When creating an executable image with a password, the password is placed within the executable in a shrouded form so that the application can read files inside the embedded ZIP archive yet casual inspection cannot read it. @@ -300,15 +306,18 @@ set password "hunter2" # Create some simple content to define a basic application file mkdir $appDir -set f [open $appDir/main.tcl] +set f [open $appDir/main.tcl w] puts $f { puts "Hi. This is [info script]" } close $f -# Create the executable +# Create the executable application \fBzipfs mkimg\fR $img $appDir $appDir $password +# remove the now obsolete temporary appDir folder +file delete -force $appDir + # Launch the executable, printing its output to stdout exec $img >@stdout # prints the following line assuming [zipfs root] returns "//zipfs:/": -- cgit v0.12 From 9cf0f1b9b7a2d4dc59e61bacdc8a3db4eca1bc02 Mon Sep 17 00:00:00 2001 From: apnadkarni Date: Wed, 11 Sep 2024 06:31:29 +0000 Subject: Add tcl::idna, expr operators, oo to changes.md --- changes.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/changes.md b/changes.md index 615b101..6f9d9c8 100644 --- a/changes.md +++ b/changes.md @@ -71,6 +71,7 @@ writing Tcl scripts. - `tcl::process` - `*::build-info` - `readFile`, `writeFile`, `foreachLine` + - `tcl::idna::*` ## New command options - `clock scan ... -validate ...` @@ -79,6 +80,7 @@ writing Tcl scripts. - `regsub ... -command ...` - `socket ... -nodelay ... -keepalive ...` - `vwait` controlled by several new options + - `expr` string comparators `lt`, `gt`, `le`, `ge` ## Numbers - 0NNN format is no longer octal interpretation. Use 0oNNN. @@ -89,6 +91,9 @@ writing Tcl scripts. - Function int() no longer truncates to word size ## tcl::oo facilities - - private variable and methods + - private variables and methods + - class variables and methods + - abstract and singleton classes + - configurable properties - `method -export`, `method -unexport` -- cgit v0.12 From 1fa64f06e8d6657d5afba87bf89cb59d00987a5d Mon Sep 17 00:00:00 2001 From: Torsten Date: Wed, 11 Sep 2024 07:37:14 +0000 Subject: Unified naming of the Mac to 'macOS' as other names are outdated since 2016 (macOS Sierra, version 10.12) --- doc/FileSystem.3 | 2 +- doc/exec.n | 2 +- doc/file.n | 2 +- doc/filename.n | 4 ++-- doc/glob.n | 6 +++--- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/FileSystem.3 b/doc/FileSystem.3 index b6c6d1e..ae3b022 100644 --- a/doc/FileSystem.3 +++ b/doc/FileSystem.3 @@ -735,7 +735,7 @@ filesystems, so that they can easily retrieve the native (char* or TCHAR*) representation of a path. This function is a convenience wrapper around \fBTcl_FSGetInternalRep\fR. It may be desirable in the future to have non-string-based native representations (for example, -on MacOSX, a representation using a fileSpec of FSRef structure would +on macOS, a representation using a fileSpec of FSRef structure would probably be more efficient). On Windows a full Unicode representation would allow for paths of unlimited length. Currently the representation is simply a character string which may contain either the relative path diff --git a/doc/exec.n b/doc/exec.n index ed1f45d..df9b365 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -335,7 +335,7 @@ the caller must prepend the desired command with because built-in commands are not implemented using executables. .RE .TP -\fBUnix\fR (including Mac OS X) +\fBUnix\fR (including macOS) . The \fBexec\fR command is fully functional and works as described. .SH "UNIX EXAMPLES" diff --git a/doc/file.n b/doc/file.n index 6c7c7a5..d74819d 100644 --- a/doc/file.n +++ b/doc/file.n @@ -78,7 +78,7 @@ names, the long name is retained. This attribute cannot be set. \fB\-system\fR gives or sets or clears the value of the system attribute of the file. .PP -On Mac OS X and Darwin, \fB\-creator\fR gives or sets the +On macOS and Darwin, \fB\-creator\fR gives or sets the Finder creator type of the file. \fB\-hidden\fR gives or sets or clears the hidden attribute of the file. \fB\-readonly\fR gives or sets or clears the readonly attribute of the file. \fB\-rsrclength\fR gives diff --git a/doc/filename.n b/doc/filename.n index 373a8ee..aee5545 100644 --- a/doc/filename.n +++ b/doc/filename.n @@ -42,7 +42,7 @@ The rules for native names depend on the value reported in the Tcl .TP 10 \fBUnix\fR . -On Unix and Apple MacOS X platforms, Tcl uses path names where the +On Unix and Apple macOS platforms, Tcl uses path names where the components are separated by slashes. Path names may be relative or absolute, and file names may contain any character other than slash. The file names \fB\&.\fR and \fB\&..\fR are special and refer to the @@ -155,7 +155,7 @@ user. If the tilde is followed immediately by a separator, the \fB$HOME\fR environment variable is substituted. Otherwise the characters between the tilde and the next separator are taken as a user name, which is used to retrieve the user's home directory for substitution. This works on -POSIX, MacOS X and Windows platforms. +POSIX, macOS and Windows platforms. .SH "PORTABILITY ISSUES" .PP Not all file systems are case sensitive, so scripts should avoid code diff --git a/doc/glob.n b/doc/glob.n index f93d6e6..45be608 100644 --- a/doc/glob.n +++ b/doc/glob.n @@ -102,11 +102,11 @@ a directory will be returned if \fB\-types d\fR was specified. The second form specifies types where all the types given must match. These are \fIr\fR, \fIw\fR, \fIx\fR as file permissions, and \fIreadonly\fR, \fIhidden\fR as special permission cases. On the -Macintosh, MacOS types and creators are also supported, where any item -which is four characters long is assumed to be a MacOS type +Macintosh, macOS types and creators are also supported, where any item +which is four characters long is assumed to be a macOS type (e.g. \fBTEXT\fR). Items which are of the form \fI{macintosh type XXXX}\fR or \fI{macintosh creator XXXX}\fR will match types or creators -respectively. Unrecognized types, or specifications of multiple MacOS +respectively. Unrecognized types, or specifications of multiple macOS types/creators will signal an error. .PP The two forms may be mixed, so \fB\-types {d f r w}\fR will find all -- cgit v0.12 From cfca71facc6a9c2b33793f16e8acc1717208a012 Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 17 Sep 2024 15:42:05 +0000 Subject: amend for 9.0 (windows only): since paths starting with ~ are relative in 9.0 for windows, it doesn't need to consider tilde expansion (in opposite to 8.x) --- win/tclWinChan.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/win/tclWinChan.c b/win/tclWinChan.c index f248383..eb41706 100644 --- a/win/tclWinChan.c +++ b/win/tclWinChan.c @@ -1001,16 +1001,13 @@ TclpOpenFileChannel( * We need this just to ensure we return the correct error messages under * some circumstances (relative paths only), so because the normalization * is very expensive, don't invoke it for native or absolute paths. - * Note: since paths starting with ~ are absolute, it also considers tilde expansion, - * (proper error message of tests *io-40.17 "tilde substitution in open") + * Note: since paths starting with ~ are relative in 9.0 for windows, + * it doesn't need to consider tilde expansion (in opposite to 8.x). */ if ( ( - ( !TclFSCwdIsNative() && (Tcl_FSGetPathType(pathPtr) != TCL_PATH_ABSOLUTE) - ) || - (*TclGetString(pathPtr) == '~') /* possible tilde expansion */ ) && Tcl_FSGetNormalizedPath(interp, pathPtr) == NULL ) { -- cgit v0.12 From eb7a5bcf6aef1ccfb4f93b828d03cbe8133bcc88 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 19 Sep 2024 11:47:35 +0000 Subject: fraktion -> fraction --- doc/clock.n | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/clock.n b/doc/clock.n index e8031de..a28254e 100644 --- a/doc/clock.n +++ b/doc/clock.n @@ -584,7 +584,7 @@ On input, accepts a string of digits (or floating point with the time fraction) and interprets it as an Astronomical Julian Day Number (JDN/JD). The Astronomical Julian Date is a count of the number of calendar days that have elapsed since 1 January, 4713 BCE of the proleptic -Julian calendar, which contains also the time fraktion (after floating point). +Julian calendar, which contains also the time fraction (after floating point). The epoch time of 1 January 1970 corresponds to Astronomical JDN 2440587.5. This value corresponds the julian day used in sqlite-database, and is the same as result of \fBselect julianday(:seconds, 'unixepoch')\fR. @@ -596,7 +596,7 @@ On input, accepts a string of digits (or floating point with the time fraction) and interprets it as a Calendar Julian Day Number. The Calendar Julian Date is a count of the number of calendar days that have elapsed since 1 January, 4713 BCE of the proleptic -Julian calendar, which contains also the time fraktion (after floating point). +Julian calendar, which contains also the time fraction (after floating point). The epoch time of 1 January 1970 corresponds to Astronomical JDN 2440588. .IP \fB%Es\fR This affects similar to \fB%s\fR, but in opposition to \fB%s\fR it parses @@ -642,7 +642,7 @@ On output, produces a two-digit number giving the hour of the day On output, produces a three-digit number giving the day of the year (001-366). On input, accepts such a number. .IP \fB%J\fR -On output, produces a string of digits giving the calendar Julian Day Number. +On output, produces a string of digits giving the Julian Day Number. On input, accepts a string of digits and interprets it as a Julian Day Number. The Julian Day Number is a count of the number of calendar days that have elapsed since 1 January, 4713 BCE of the proleptic @@ -933,7 +933,7 @@ acceptable formats are .QW "\fIdd monthname yy\fR" , .QW "?\fICC\fR?\fIyymmdd\fR" , and -.QW "\fIdd\fB\-\fImonthname\fB\-\fR?\fICC\fR?\fIyy\fR" . +.QW "\fIdd\fB-\fImonthname\fB-\fR?\fICC\fR?\fIyy\fR" . The default year is the current year. If the year is less than 100, we treat the years 00-68 as 2000-2068 and the years 69-99 as 1969-1999. Not all platforms can represent the years 38-70, so -- cgit v0.12 From 87448892f068af3f186b59c162c5870ea786ef56 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 20 Sep 2024 12:31:31 +0000 Subject: Fix for [e38dce74e2]: Command line built with list not properly quoted. With test-case --- generic/tclListObj.c | 12 +++++++++--- tests/list.test | 5 +++++ 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 975bc2e..22de244 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1775,9 +1775,15 @@ Tcl_ListObjAppendList( return TCL_ERROR; } - if (elemCount <= 0) { - /* Nothing to do. Note AFTER check for list above */ - return TCL_OK; + if (elemCount < 0) { + /* + * Note that when elemCount <= 0, this routine is logically a + * no-op, removing and adding no elements to the list. However, by flowing + * through this routine anyway, we get the important side effect that the + * resulting listPtr is a list in canonical form. This is important. + * Resist any temptation to optimize this case. See bug [e38dce74e2] + */ + elemCount = 0; } ListRepElements(&listRep, toLen, toObjv); diff --git a/tests/list.test b/tests/list.test index 905a3d3..f14b0a9 100644 --- a/tests/list.test +++ b/tests/list.test @@ -64,6 +64,11 @@ test list-1.30 {basic null treatment} { set e "\x00abc xyz" string equal $l $e } 1 +test list-1.31 {bug [e38dce74e2]} { + set l #foo + set e {} + list {*}$l {*}$e +} {{#foo}} # For the next round of tests create a list and then pick it apart # with "index" to make sure that we get back exactly what went in. -- cgit v0.12 From c9e31032b076073d522cd19edca9905dd3cbfd58 Mon Sep 17 00:00:00 2001 From: sebres Date: Fri, 20 Sep 2024 20:22:28 +0000 Subject: a bit better form (simply reset the string representation and return, since the object is unshared) --- generic/tclListObj.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/generic/tclListObj.c b/generic/tclListObj.c index 22de244..e7470a9 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1775,15 +1775,20 @@ Tcl_ListObjAppendList( return TCL_ERROR; } - if (elemCount < 0) { + if (elemCount <= 0) { /* * Note that when elemCount <= 0, this routine is logically a * no-op, removing and adding no elements to the list. However, by flowing * through this routine anyway, we get the important side effect that the * resulting listPtr is a list in canonical form. This is important. - * Resist any temptation to optimize this case. See bug [e38dce74e2] + * Resist any temptation to optimize this case. See bug [e38dce74e2]. + * No needs to check ListObjIsCanonical, first byte check is enough */ - elemCount = 0; + if (toObj->bytes && *toObj->bytes == '#') { + TclInvalidateStringRep(toObj); + } + /* Nothing to do. Note AFTER check for list above */ + return TCL_OK; } ListRepElements(&listRep, toLen, toObjv); -- cgit v0.12 From 89414d6866d790a432c745dc6626e8c1d9b5e27a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 21 Sep 2024 22:35:29 +0000 Subject: Another testcase, which gives a different result in 8.6 --- tests/list.test | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/list.test b/tests/list.test index f14b0a9..a4689ae 100644 --- a/tests/list.test +++ b/tests/list.test @@ -69,6 +69,11 @@ test list-1.31 {bug [e38dce74e2]} { set e {} list {*}$l {*}$e } {{#foo}} +test list-1.32 {bug [e38dce74e2]} { + set l " #foo" + set e {} + list {*}$l {*}$e +} {{#foo}} # For the next round of tests create a list and then pick it apart # with "index" to make sure that we get back exactly what went in. -- cgit v0.12 From 3f90a262e2da5088f3647e4e428731f94dbd6dc4 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 21 Sep 2024 22:46:21 +0000 Subject: It looks like ListObjIsCanonical() is needed after all --- generic/tclListObj.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/generic/tclListObj.c b/generic/tclListObj.c index e7470a9..08a638e 100644 --- a/generic/tclListObj.c +++ b/generic/tclListObj.c @@ -1778,13 +1778,12 @@ Tcl_ListObjAppendList( if (elemCount <= 0) { /* * Note that when elemCount <= 0, this routine is logically a - * no-op, removing and adding no elements to the list. However, by flowing - * through this routine anyway, we get the important side effect that the + * no-op, removing and adding no elements to the list. However, by removing + * the string representation, we get the important side effect that the * resulting listPtr is a list in canonical form. This is important. * Resist any temptation to optimize this case. See bug [e38dce74e2]. - * No needs to check ListObjIsCanonical, first byte check is enough */ - if (toObj->bytes && *toObj->bytes == '#') { + if (!ListObjIsCanonical(toObj)) { TclInvalidateStringRep(toObj); } /* Nothing to do. Note AFTER check for list above */ -- cgit v0.12