From 05adca525608343e116de898ff6f3b0cd3300429 Mon Sep 17 00:00:00 2001 From: oehhar Date: Fri, 3 Nov 2023 11:25:01 +0000 Subject: Ticket [21b0629c] introduced additional exec quoting for Windows, but did not document it. Here is a proposed documentation. --- doc/exec.n | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/doc/exec.n b/doc/exec.n index d7fd96b..dc1c8c5 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -234,6 +234,37 @@ processor (\fBcmd.exe /c\fR), because this causes truncation of command-line (also the argument chain) on the first newline character. But it works properly with an executable (using CommandLineToArgv, etc). .PP +\fBVulnerable arguments\fR +.RS +If invoking batch files or other specific programs, the Windows environment +does execute programs mentioned in the arguments or replace environment +variables, which may breake any already existing quoting (for example, if the +environment variable contains a special character like a \fB"\fR). +Examples are: +.CS +% exec my-echo.cmd {test&whoami} + test + mylogin +% exec my-echo.cmd "ENV X:%X%" + ENV X: CONTENT OF X +.CE +This might be seen as a vulnerability. In consequence, the following formatting +is automatically performed on any argument item: +.IP \(bu 3 +Avoid subprogram execution: +Any non-paired special +characters (\fB&\fR, \fB|\fR, \fB^\fR, \fB<\fR, \fB>\fR, \fB!\fR, \fB(\fR, +\fB)\fR, \fB(\fR, \fB%\fR) are automatically enclosed in quotes (\fB"\fR). +.IP \(bu 3 +Avoid environment variable replacement: +Any appearence of environment variable reference (\fB%\fR) is individually quoted +by \fB"\fR. +.PP +This quoting was introduced in TCL 8.6.10 breaking present scripts which rely on +the replacement functionality to avoid. A solution with command parameters is +envisaged for TCL 8.6.14. +.RE +.PP The Tk console text widget does not provide real standard IO capabilities. Under Tk, when redirecting from standard input, all applications will see an immediate end-of-file; information redirected to standard output or standard -- cgit v0.12 From 352f3ff588c3d6ca7b832fd69a88416d9ea0c0f9 Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 6 Nov 2023 16:36:58 +0000 Subject: Exec documentation: refine Windows quoting section (thanks, Sergey !) --- doc/exec.n | 30 +++++++++++++++++++----------- 1 file changed, 19 insertions(+), 11 deletions(-) diff --git a/doc/exec.n b/doc/exec.n index dc1c8c5..4024ffe 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -234,12 +234,17 @@ processor (\fBcmd.exe /c\fR), because this causes truncation of command-line (also the argument chain) on the first newline character. But it works properly with an executable (using CommandLineToArgv, etc). .PP -\fBVulnerable arguments\fR +\fBArgument quoting\fR .RS +Each argument of the \fBexec\fR command is mapped to an argument of the called +program by an adaptive quoting by adding quote characters (\fB"\fR) around the +arguments. +.PP If invoking batch files or other specific programs, the Windows environment does execute programs mentioned in the arguments or replace environment -variables, which may breake any already existing quoting (for example, if the -environment variable contains a special character like a \fB"\fR). +variables, which may have side effects (vulnerabilities) or break any already +existing quoting (for example, if the environment variable contains a special +character like a \fB"\fR). Examples are: .CS % exec my-echo.cmd {test&whoami} @@ -248,21 +253,24 @@ Examples are: % exec my-echo.cmd "ENV X:%X%" ENV X: CONTENT OF X .CE -This might be seen as a vulnerability. In consequence, the following formatting -is automatically performed on any argument item: +In consequence, the following formatting is automatically performed on any +argument item: .IP \(bu 3 Avoid subprogram execution: -Any non-paired special -characters (\fB&\fR, \fB|\fR, \fB^\fR, \fB<\fR, \fB>\fR, \fB!\fR, \fB(\fR, -\fB)\fR, \fB(\fR, \fB%\fR) are automatically enclosed in quotes (\fB"\fR). +Any special character argument containing a special character (\fB&\fR, \fB|\fR, +\fB^\fR, \fB<\fR, \fB>\fR, \fB!\fR, \fB(\fR, \fB)\fR, \fB(\fR, \fB%\fR) +is automatically enclosed in quotes (\fB"\fR). Any data quote is escaped by +appropriate sequences like a double-quote. .IP \(bu 3 Avoid environment variable replacement: Any appearence of environment variable reference (\fB%\fR) is individually quoted by \fB"\fR. .PP -This quoting was introduced in TCL 8.6.10 breaking present scripts which rely on -the replacement functionality to avoid. A solution with command parameters is -envisaged for TCL 8.6.14. +TCL 8.6.10 refined this quoting by adding quoting for data quotes and individual +quoting of "\fB%\fR". +This may break present scripts which rely on the replacement functionality of +environment variables. +A solution with command parameters is envisaged for a future release of TCL. .RE .PP The Tk console text widget does not provide real standard IO capabilities. -- cgit v0.12 From cea34b6d9878e7861e983182282b2905b4a2175f Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 6 Nov 2023 16:45:18 +0000 Subject: Remove the quoting example by ". It is more complicated than that, so be quiet. --- doc/exec.n | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/exec.n b/doc/exec.n index 4024ffe..f4a1702 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -260,7 +260,7 @@ Avoid subprogram execution: Any special character argument containing a special character (\fB&\fR, \fB|\fR, \fB^\fR, \fB<\fR, \fB>\fR, \fB!\fR, \fB(\fR, \fB)\fR, \fB(\fR, \fB%\fR) is automatically enclosed in quotes (\fB"\fR). Any data quote is escaped by -appropriate sequences like a double-quote. +appropriate sequences. .IP \(bu 3 Avoid environment variable replacement: Any appearence of environment variable reference (\fB%\fR) is individually quoted -- cgit v0.12 From 6fd38cd1bcd7cb05f4fb58f8e314ec893354219b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Nov 2023 22:33:13 +0000 Subject: Fix [18f2432b0db2bc08]: Tcl_FSSplitPath compiler warning about macros --- generic/tclCmdAH.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 12216d4..3934b65 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -2212,7 +2212,7 @@ PathSplitCmd( Tcl_WrongNumArgs(interp, 1, objv, "name"); return TCL_ERROR; } - res = Tcl_FSSplitPath(objv[1], NULL); + res = Tcl_FSSplitPath(objv[1], (Tcl_Size)NULL); if (res == NULL) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "could not read \"%s\": no such file or directory", @@ -3109,7 +3109,8 @@ ForeachAssignments( Tcl_Interp *interp, struct ForeachState *statePtr) { - int i, v, k; + int i; + Tcl_Size v, k; Tcl_Obj *valuePtr, *varValuePtr; for (i=0 ; inumLists ; i++) { -- cgit v0.12 From bf71b7786087d4fcf84dbfcd54a9446586f7c52f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Nov 2023 08:27:36 +0000 Subject: Wrong type-cast --- generic/tclCmdAH.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 3934b65..c4b210c 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -2212,7 +2212,7 @@ PathSplitCmd( Tcl_WrongNumArgs(interp, 1, objv, "name"); return TCL_ERROR; } - res = Tcl_FSSplitPath(objv[1], (Tcl_Size)NULL); + res = Tcl_FSSplitPath(objv[1], (Tcl_Size *)NULL); if (res == NULL) { Tcl_SetObjResult(interp, Tcl_ObjPrintf( "could not read \"%s\": no such file or directory", -- cgit v0.12 From 4626fe265c1403e3568ae8c3d77bb87a88cc4b25 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Nov 2023 21:54:57 +0000 Subject: (partial) fix [54a305cb88]: warning: variable set but not used [-Wunused-but-set-variable] --- generic/tclFileName.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/generic/tclFileName.c b/generic/tclFileName.c index b3294fd..d6dac9c 100644 --- a/generic/tclFileName.c +++ b/generic/tclFileName.c @@ -2138,7 +2138,7 @@ DoGlob( Tcl_GlobTypeData *types) /* List object containing list of acceptable * types. May be NULL. */ { - int baseLength, quoted, count; + int baseLength, quoted; int result = TCL_OK; char *name, *p, *openBrace, *closeBrace, *firstSpecialChar; Tcl_Obj *joinedPtr; @@ -2148,7 +2148,6 @@ DoGlob( * past the last initial separator. */ - count = 0; name = pattern; for (; *pattern != '\0'; pattern++) { if (*pattern == '\\') { @@ -2168,7 +2167,6 @@ DoGlob( } else if (strchr(separators, *pattern) == NULL) { break; } - count++; } /* -- cgit v0.12 From af67150caeea40a704056ad8aaa8e793d5fce3ac Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Nov 2023 08:55:04 +0000 Subject: Recommend Tcl_CreateObjCommand over Tcl_CreateCommand --- generic/tclBasic.c | 21 ++++++++------------- generic/tclNamesp.c | 4 ++-- unix/tclAppInit.c | 2 +- win/tclAppInit.c | 2 +- 4 files changed, 12 insertions(+), 17 deletions(-) diff --git a/generic/tclBasic.c b/generic/tclBasic.c index 0120466..b01717e 100644 --- a/generic/tclBasic.c +++ b/generic/tclBasic.c @@ -1082,14 +1082,9 @@ Tcl_CreateInterp(void) iPtr->deferredCallbacks = NULL; /* - * Create the core commands. Do it here, rather than calling - * Tcl_CreateCommand, because it's faster (there's no need to check for a - * preexisting command by the same name). If a command has a Tcl_CmdProc - * but no Tcl_ObjCmdProc, set the Tcl_ObjCmdProc to - * TclInvokeStringCommand. This is an object-based wrapper function that - * extracts strings, calls the string function, and creates an object for - * the result. Similarly, if a command has a Tcl_ObjCmdProc but no - * Tcl_CmdProc, set the Tcl_CmdProc to TclInvokeObjectCommand. + * Create the core commands. Do it here, rather than calling Tcl_CreateObjCommand, + * because it's faster (there's no need to check for a preexisting command + * by the same name). Set the Tcl_CmdProc to TclInvokeObjectCommand. */ for (cmdInfoPtr = builtInCmds; cmdInfoPtr->name != NULL; cmdInfoPtr++) { @@ -3140,7 +3135,7 @@ TclRenameCommand( /* * Make sure that the destination command does not already exist. The * rename operation is like creating a command, so we should automatically - * create the containing namespaces just like Tcl_CreateCommand would. + * create the containing namespaces just like Tcl_CreateObjCommand would. */ TclGetNamespaceForQualName(interp, newName, NULL, @@ -3445,7 +3440,7 @@ Tcl_GetCommandInfoFromToken( * * Tcl_GetCommandName -- * - * Given a token returned by Tcl_CreateCommand, this function returns the + * Given a token returned by Tcl_CreateObjCommand, this function returns the * current name of the command (which may have changed due to renaming). * * Results: @@ -3461,7 +3456,7 @@ const char * Tcl_GetCommandName( TCL_UNUSED(Tcl_Interp *), Tcl_Command command) /* Token for command returned by a previous - * call to Tcl_CreateCommand. The command must + * call to Tcl_CreateObjCommand. The command must * not have been deleted. */ { Command *cmdPtr = (Command *) command; @@ -3484,7 +3479,7 @@ Tcl_GetCommandName( * * Tcl_GetCommandFullName -- * - * Given a token returned by, e.g., Tcl_CreateCommand or Tcl_FindCommand, + * Given a token returned by, e.g., Tcl_CreateObjCommand or Tcl_FindCommand, * this function appends to an object the command's full name, qualified * by a sequence of parent namespace names. The command's fully-qualified * name may have changed due to renaming. @@ -3503,7 +3498,7 @@ void Tcl_GetCommandFullName( Tcl_Interp *interp, /* Interpreter containing the command. */ Tcl_Command command, /* Token for command returned by a previous - * call to Tcl_CreateCommand. The command must + * call to Tcl_CreateObjCommand. The command must * not have been deleted. */ Tcl_Obj *objPtr) /* Points to the object onto which the * command's full name is appended. */ diff --git a/generic/tclNamesp.c b/generic/tclNamesp.c index 24d9646..7a32fd9 100644 --- a/generic/tclNamesp.c +++ b/generic/tclNamesp.c @@ -2156,7 +2156,7 @@ DeleteImportedCmd( * If "flags" contains TCL_CREATE_NS_IF_UNKNOWN, all namespace components * of the qualified name that cannot be found are automatically created * within their specified parent. This makes sure that functions like - * Tcl_CreateCommand always succeed. There is no alternate search path, + * Tcl_CreateObjCommand always succeed. There is no alternate search path, * so *altNsPtrPtr is set NULL. * * If "flags" contains TCL_FIND_ONLY_NS, the qualified name is treated as @@ -2353,7 +2353,7 @@ TclGetNamespaceForQualName( * Look up the namespace qualifier nsName in the current namespace * context. If it isn't found but TCL_CREATE_NS_IF_UNKNOWN is set, * create that qualifying namespace. This is needed for functions like - * Tcl_CreateCommand that cannot fail. + * Tcl_CreateObjCommand that cannot fail. */ if (nsPtr != NULL) { diff --git a/unix/tclAppInit.c b/unix/tclAppInit.c index b203487..04ae564 100644 --- a/unix/tclAppInit.c +++ b/unix/tclAppInit.c @@ -148,7 +148,7 @@ Tcl_AppInit( */ /* - * Call Tcl_CreateCommand for application-specific commands, if they + * Call Tcl_CreateObjCommand for application-specific commands, if they * weren't already created by the init procedures called above. */ diff --git a/win/tclAppInit.c b/win/tclAppInit.c index a3914f1..d1b38ee 100644 --- a/win/tclAppInit.c +++ b/win/tclAppInit.c @@ -204,7 +204,7 @@ Tcl_AppInit( */ /* - * Call Tcl_CreateCommand for application-specific commands, if they + * Call Tcl_CreateObjCommand for application-specific commands, if they * weren't already created by the init procedures called above. */ -- cgit v0.12 From a4ac20392acd864a5b3d95221edf1bafc6737b23 Mon Sep 17 00:00:00 2001 From: oehhar Date: Fri, 10 Nov 2023 10:54:39 +0000 Subject: Exec wordsmithing. Thanks, Sergey! --- doc/exec.n | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/doc/exec.n b/doc/exec.n index f4a1702..a0008ad 100644 --- a/doc/exec.n +++ b/doc/exec.n @@ -236,15 +236,15 @@ But it works properly with an executable (using CommandLineToArgv, etc). .PP \fBArgument quoting\fR .RS -Each argument of the \fBexec\fR command is mapped to an argument of the called -program by an adaptive quoting by adding quote characters (\fB"\fR) around the -arguments. -.PP -If invoking batch files or other specific programs, the Windows environment -does execute programs mentioned in the arguments or replace environment -variables, which may have side effects (vulnerabilities) or break any already -existing quoting (for example, if the environment variable contains a special -character like a \fB"\fR). +The arguments of the \fBexec\fR command are mapped to the arguments of the called +program. Additional quote characters (\fB"\fR) are automatically added around +arguments if expected. Special characters are escaped by inserting backslash +characters. +.PP +The MS-Windows environment does execute programs mentioned in the arguments and +called batch files (conspec) replace environment variables, which may have side +effects (vulnerabilities) or break any already existing quoting (for example, +if the environment variable contains a special character like a \fB"\fR). Examples are: .CS % exec my-echo.cmd {test&whoami} @@ -253,14 +253,14 @@ Examples are: % exec my-echo.cmd "ENV X:%X%" ENV X: CONTENT OF X .CE -In consequence, the following formatting is automatically performed on any +The following formatting is automatically performed on any argument item: .IP \(bu 3 Avoid subprogram execution: Any special character argument containing a special character (\fB&\fR, \fB|\fR, \fB^\fR, \fB<\fR, \fB>\fR, \fB!\fR, \fB(\fR, \fB)\fR, \fB(\fR, \fB%\fR) is automatically enclosed in quotes (\fB"\fR). Any data quote is escaped by -appropriate sequences. +insertion of backslash characters. .IP \(bu 3 Avoid environment variable replacement: Any appearence of environment variable reference (\fB%\fR) is individually quoted -- cgit v0.12 From aaf72b94fa284aeb2580f8c5c4cac49e5166cb82 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Nov 2023 13:55:54 +0000 Subject: Fix typo's in tclGetDate.y --- generic/tclGetDate.y | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/generic/tclGetDate.y b/generic/tclGetDate.y index 412f03f..ac9bf1c 100644 --- a/generic/tclGetDate.y +++ b/generic/tclGetDate.y @@ -641,7 +641,7 @@ static const TABLE TimezoneTable[] = { { "ist", tZONE, -HOUR(11/2) }, /* Indian Standard */ { "zp6", tZONE, -HOUR( 6) }, /* USSR Zone 5 */ #if 0 - /* For completeness. NST is also Newfoundland Stanard, nad SST is + /* For completeness. NST is also Newfoundland Standard, and SST is * also Swedish Summer. */ { "nst", tZONE, -HOUR(13/2) }, /* North Sumatra */ { "sst", tZONE, -HOUR( 7) }, /* South Sumatra, USSR Zone 6 */ @@ -1099,7 +1099,7 @@ TclClockOldscanObjCmd( } Tcl_ListObjAppendElement(interp, result, resultElement); - TcNewObj(resultElement); + TclNewObj(resultElement); if (yyHaveDay && !yyHaveDate) { Tcl_ListObjAppendElement(interp, resultElement, Tcl_NewIntObj((int) yyDayOrdinal)); -- cgit v0.12 From 67b84b71c66b45cb139546c2a7e3aec964a5d4c6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Nov 2023 14:22:06 +0000 Subject: Fix [54a305cb88] (second part) by hand-editing tclDate.c. --- generic/tclDate.c | 6 ------ unix/Makefile.in | 2 ++ 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/generic/tclDate.c b/generic/tclDate.c index ebe499d..fa27475 100644 --- a/generic/tclDate.c +++ b/generic/tclDate.c @@ -64,7 +64,6 @@ #define yylex TclDatelex #define yyerror TclDateerror #define yydebug TclDatedebug -#define yynerrs TclDatenerrs /* Copy the first part of user declarations. */ @@ -1295,9 +1294,6 @@ static YYLTYPE yyloc_default ; YYLTYPE yylloc = yyloc_default; - /* Number of syntax errors so far. */ - int yynerrs; - int yystate; /* Number of tokens to shift before error messages enabled. */ int yyerrstatus; @@ -1361,7 +1357,6 @@ YYLTYPE yylloc = yyloc_default; yystate = 0; yyerrstatus = 0; - yynerrs = 0; yychar = YYEMPTY; /* Cause a token to be read. */ yylsp[0] = yylloc; goto yysetstate; @@ -2099,7 +2094,6 @@ yyerrlab: /* If not already recovering from an error, report this error. */ if (!yyerrstatus) { - ++yynerrs; #if ! YYERROR_VERBOSE yyerror (&yylloc, info, YY_("syntax error")); #else diff --git a/unix/Makefile.in b/unix/Makefile.in index 39965cf..9267ef7 100644 --- a/unix/Makefile.in +++ b/unix/Makefile.in @@ -1919,6 +1919,8 @@ dist-packages: configure-packages # the name of the .y file so that make doesn't try to automatically regenerate # the .c file. +# +# Remark: see [54a305cb88]. tclDate.c is manually edited, removing the unused "yynerrs" variable gendate: bison --output-file=$(GENERIC_DIR)/tclDate.c \ --no-lines \ -- cgit v0.12 From b62a3d44eaf5682d190fb17bc414e45ed3b11901 Mon Sep 17 00:00:00 2001 From: oehhar Date: Sun, 12 Nov 2023 18:55:01 +0000 Subject: Bug [c4eb46a1]: endless loop on gets, non blocking, profile strict, encoding error: remove non-blocking exit condition and add test case --- generic/tclIO.c | 13 +++++++++++-- tests/io.test | 25 ++++++++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 6461909..c92fb64 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4918,8 +4918,17 @@ Tcl_GetsObj( } goto gotEOL; } else if (gs.bytesWrote == 0 - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR) - && !GotFlag(statePtr, CHANNEL_NONBLOCKING)) { + && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + /* Ticket c4eb46a1 Harald Oehlmann 2023-11-12 debugging session. + * In non blocking mode we loop indifenitly on a decoding error in + * this while-loop. + * Removed the following from the upper condition: + * "&& !GotFlag(statePtr, CHANNEL_NONBLOCKING)" + * In case of an encoding error with leading correct bytes, we pass here + * two times, as gs.bytesWrote is not 0 on the first pass. This feels + * once to much, as the data is anyway not used. + */ + /* Set eol to the position that caused the encoding error, and then * continue to gotEOL, which stores the data that was decoded * without error to objPtr. This allows the caller to do something diff --git a/tests/io.test b/tests/io.test index 9f731ad..a6683c8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9193,7 +9193,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -setup { +test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9211,6 +9211,25 @@ test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -se } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} +# TCL ticket c4eb46a196: non blocking case had endless loop, so test it +test io-75.6.2 {invalid utf-8 encoding, non blocking gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6.2] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is an incomplete byte sequence in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ + -translation lf -profile strict -blocking 0 +} -body { + gets $f +} -cleanup { + close $f + removeFile io-75.6.2 +} -match glob -returnCodes 1 -result {error reading "file*":\ + invalid or incomplete multibyte or wide character} + test io-75.7 { invalid utf-8 encoding read is not ignored (-profile strict) } -setup { @@ -9232,7 +9251,7 @@ test io-75.7 { } -match glob -result {1 {error reading "file*":\ invalid or incomplete multibyte or wide character}} -test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof first handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9254,7 +9273,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { removeFile io-75.8 } -result {41 1 {}} -test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup { +test io-75.8.eoflater {invalid utf-8 encoding eof after handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] # This also configures the channel encoding profile as strict. -- cgit v0.12 From 44f9c28e418b785e842ac8b986daa9120d2a4b37 Mon Sep 17 00:00:00 2001 From: oehhar Date: Sun, 12 Nov 2023 19:32:07 +0000 Subject: bug [c4eb46a1]: fix was effective for test sequence "A\xC3B", but not for "A\x81". So add test io-75.6.1 with first sequence, io-75.6.2 is currently failing, as the gets does not return with an error. --- tests/io.test | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/tests/io.test b/tests/io.test index a6683c8..1078a50 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9212,8 +9212,27 @@ test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile str invalid or incomplete multibyte or wide character} # TCL ticket c4eb46a196: non blocking case had endless loop, so test it +# The first fix was successful with the test data A\xC3B, but not with A\x81. So, test both +test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6.1] + set f [open $fn w+] + fconfigure $f -encoding binary + # utf-8: \xC3 requires a 2nd byte > x80, but Date: Mon, 13 Nov 2023 12:36:34 +0000 Subject: Simplify TIP #656: "A revised proposal for encodings". Make TCL_ENCODING_PROFILE_??? values the same as in Tcl 9.0 after TIP #657: Make "-profile strict" the default in Tcl 9.0 --- generic/tcl.h | 5 ++-- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 67 +++------------------------------------------------ generic/tclInt.h | 13 +++++----- generic/tclTest.c | 2 +- generic/tclZipfs.c | 2 +- 6 files changed, 15 insertions(+), 76 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index 2f1f793..5769cbd 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2180,10 +2180,9 @@ typedef struct Tcl_EncodingType { * changes, ensure ENCODING_PROFILE_* macros in tclInt.h are modified if * necessary. */ +#define TCL_ENCODING_PROFILE_STRICT TCL_ENCODING_STOPONERROR #define TCL_ENCODING_PROFILE_TCL8 0x01000000 -#define TCL_ENCODING_PROFILE_STRICT 0x02000000 -#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 +#define TCL_ENCODING_PROFILE_REPLACE 0x02000000 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index c4b210c..e7e929f 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -673,7 +673,7 @@ EncodingConvertfromObjCmd( /* * Convert the string into a byte array in 'ds'. */ -#if !defined(TCL_NO_DEPRECATED) && (TCL_MAJOR_VERSION < 9) +#if !defined(TCL_NO_DEPRECATED) if (ENCODING_PROFILE_GET(flags) == TCL_ENCODING_PROFILE_TCL8) { /* Permits high bits to be non-0 in byte array (Tcl 8 style) */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index e461db2..262dd01 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -192,7 +192,7 @@ Tcl_Encoding tclUtf8Encoding = NULL; * Names of encoding profiles and corresponding integer values. * Keep alphabetical order for error messages. */ -static struct TclEncodingProfiles { +static const struct TclEncodingProfiles { const char *name; int value; } encodingProfiles[] = { @@ -201,10 +201,10 @@ static struct TclEncodingProfiles { {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ - (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) + ((flags_) & TCL_ENCODING_PROFILE_STRICT) #define PROFILE_REPLACE(flags_) \ - (ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) + ((ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) && !PROFILE_STRICT(flags_)) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -1229,10 +1229,6 @@ Tcl_ExternalToUtfDString( * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * Any other flag bits will cause an error to be returned (for future - * compatibility) * * Results: * The return value is one of @@ -1535,8 +1531,6 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE_* - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * * Results: * The return value is one of @@ -2459,7 +2453,6 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2527,7 +2520,6 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2743,7 +2735,6 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch = 0, bytesLeft = srcLen % 4; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2900,7 +2891,6 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -2998,7 +2988,6 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3159,7 +3148,6 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3265,7 +3253,6 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3375,7 +3362,6 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3508,7 +3494,6 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3609,7 +3594,6 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3694,7 +3678,6 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3834,7 +3817,6 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -4056,7 +4038,6 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4505,48 +4486,6 @@ TclEncodingProfileIdToName( /* *------------------------------------------------------------------------ * - * TclEncodingSetProfileFlags -- - * - * Maps the flags supported in the encoding C API's to internal flags. - * - * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is - * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile - * specified. - * - * If no profile or an invalid profile is specified, it is set to - * the default. - * - * Results: - * Internal encoding flag mask. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------ - */ -int TclEncodingSetProfileFlags(int flags) -{ - if (flags & TCL_ENCODING_STOPONERROR) { - ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); - } else { - int profile = ENCODING_PROFILE_GET(flags); - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - case TCL_ENCODING_PROFILE_STRICT: - case TCL_ENCODING_PROFILE_REPLACE: - break; - case 0: /* Unspecified by caller */ - default: - ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_TCL8); - break; - } - } - return flags; -} - -/* - *------------------------------------------------------------------------ - * * TclGetEncodingProfiles -- * * Get the list of supported encoding profiles. diff --git a/generic/tclInt.h b/generic/tclInt.h index 0b46184..3d8a702 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2886,11 +2886,13 @@ typedef struct ProcessGlobalValue { */ #define ENCODING_PROFILE_MASK 0xFF000000 -#define ENCODING_PROFILE_GET(flags_) ((flags_) & ENCODING_PROFILE_MASK) -#define ENCODING_PROFILE_SET(flags_, profile_) \ - do { \ - (flags_) &= ~ENCODING_PROFILE_MASK; \ - (flags_) |= profile_; \ +#define ENCODING_PROFILE_GET(flags_) (((flags_) & TCL_ENCODING_PROFILE_STRICT) ? \ + TCL_ENCODING_PROFILE_STRICT : (((flags_) & ENCODING_PROFILE_MASK) ? \ + ((flags_) & ENCODING_PROFILE_MASK) : TCL_ENCODING_PROFILE_TCL8)) +#define ENCODING_PROFILE_SET(flags_, profile_) \ + do { \ + (flags_) &= ~(ENCODING_PROFILE_MASK|TCL_ENCODING_PROFILE_STRICT); \ + (flags_) |= (profile_) & (ENCODING_PROFILE_MASK|TCL_ENCODING_PROFILE_STRICT); \ } while (0) /* @@ -2916,7 +2918,6 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/generic/tclTest.c b/generic/tclTest.c index 6a90b67..0decc21 100644 --- a/generic/tclTest.c +++ b/generic/tclTest.c @@ -2092,7 +2092,7 @@ static int UtfExtWrapper( } flagMap[] = { {"start", TCL_ENCODING_START}, {"end", TCL_ENCODING_END}, - {"stoponerror", TCL_ENCODING_STOPONERROR}, + {"stoponerror", TCL_ENCODING_PROFILE_STRICT}, {"noterminate", TCL_ENCODING_NO_TERMINATE}, {"charlimit", TCL_ENCODING_CHAR_LIMIT}, {"profiletcl8", TCL_ENCODING_PROFILE_TCL8}, diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index 842d51a..5df300a 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -906,7 +906,7 @@ DecodeZipEntryText( dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; flags = TCL_ENCODING_START | TCL_ENCODING_END | - TCL_ENCODING_STOPONERROR; /* Special flag! */ + TCL_ENCODING_PROFILE_STRICT; /* Special flag! */ while (1) { int srcRead, dstWrote; -- cgit v0.12 From f2f65837424c0c2203228c46a3274edff4eb9265 Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 13 Nov 2023 12:48:06 +0000 Subject: Bug [c4eb46a1]: non-blocking gets fires the error on 2nd call when sequence is incomplete. Added some test cases. --- generic/tclIO.c | 6 ++++++ tests/io.test | 56 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index c92fb64..bc1b1c6 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4749,6 +4749,12 @@ Tcl_GetsObj( ResetFlag(statePtr, CHANNEL_BLOCKED); while (1) { if (dst >= dstEnd) { + /* + * In case of encoding errors, state gets flag + * CHANNEL_ENCODING_ERROR set in the call below. First, the + * EOF/EOL condition is checked, as we may have valid data with + * EOF/EOL before the encoding error. + */ if (FilterInputBytes(chanPtr, &gs) != 0) { goto restore; } diff --git a/tests/io.test b/tests/io.test index 1078a50..7e62e6b 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9193,7 +9193,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { +test io-75.6 {incomplete utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9211,9 +9211,7 @@ test io-75.6 {invalid utf-8 encoding, blocking gets is not ignored (-profile str } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} -# TCL ticket c4eb46a196: non blocking case had endless loop, so test it -# The first fix was successful with the test data A\xC3B, but not with A\x81. So, test both -test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignored (-profile strict)} -setup { +test io-75.6.1 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6.1] set f [open $fn w+] fconfigure $f -encoding binary @@ -9222,7 +9220,7 @@ test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignor flush $f seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ - -translation lf -profile strict -blocking 0 + -translation lf -profile strict } -body { gets $f } -cleanup { @@ -9231,8 +9229,48 @@ test io-75.6.1 {invalid utf-8 encoding "A xc3 B", non blocking gets is not ignor } -match glob -returnCodes 1 -result {error reading "file*":\ invalid or incomplete multibyte or wide character} -test io-75.6.2 {invalid utf-8 encoding, non blocking gets is not ignored (-profile strict)} -setup { - set fn [makeFile {} io-75.6.1] +test io-75.6.2 {invalid utf-8 encoding, blocking gets is not ignored (-profile strict), recover functionality} -setup { + set fn [makeFile {} io-75.6.2] + set f [open $fn w+] + fconfigure $f -encoding binary + # utf-8: \xC3 requires a 2nd byte > x80, but x80, but Date: Mon, 13 Nov 2023 13:46:31 +0000 Subject: Fix for TIP #641: If sizeof(*(boolPtr)) > sizeof(int), generate a compiler-error. Requested by @pointsman --- generic/tclDecls.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 0fe582e..5768233 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -4331,23 +4331,29 @@ extern const TclStubs *tclStubsPtr; #undef Tcl_GetIndexFromObjStruct #undef Tcl_GetBooleanFromObj #undef Tcl_GetBoolean +#ifdef __GNUC__ + /* If this gives: "error: size of array ‘_boolVar’ is negative", it means that sizeof(*boolPtr)>sizeof(int), which is not allowed */ +# define TCLBOOLWARNING(boolPtr) ({__attribute__((unused)) char _bool_Var[sizeof(*(boolPtr)) > sizeof(int) ? -1 : 1];}), +#else +# define TCLBOOLWARNING(boolPtr) +#endif #if defined(USE_TCL_STUBS) #define Tcl_GetIndexFromObjStruct(interp, objPtr, tablePtr, offset, msg, flags, indexPtr) \ (tclStubsPtr->tcl_GetIndexFromObjStruct((interp), (objPtr), (tablePtr), (offset), (msg), (flags)|(int)(sizeof(*(indexPtr))<<1), (indexPtr))) #define Tcl_GetBooleanFromObj(interp, objPtr, boolPtr) \ - (sizeof(*(boolPtr)) == sizeof(int) ? tclStubsPtr->tcl_GetBooleanFromObj(interp, objPtr, (int *)(boolPtr)) : \ + (sizeof(*(boolPtr)) >= sizeof(int) ? (TCLBOOLWARNING(boolPtr)tclStubsPtr->tcl_GetBooleanFromObj(interp, objPtr, (int *)(boolPtr))) : \ Tcl_GetBoolFromObj(interp, objPtr, (TCL_NULL_OK-2)&(int)sizeof((*(boolPtr))), (char *)(boolPtr))) #define Tcl_GetBoolean(interp, src, boolPtr) \ - (sizeof(*(boolPtr)) == sizeof(int) ? tclStubsPtr->tcl_GetBoolean(interp, src, (int *)(boolPtr)) : \ + (sizeof(*(boolPtr)) >= sizeof(int) ? (TCLBOOLWARNING(boolPtr)tclStubsPtr->tcl_GetBoolean(interp, src, (int *)(boolPtr))) : \ Tcl_GetBool(interp, src, (TCL_NULL_OK-2)&(int)sizeof((*(boolPtr))), (char *)(boolPtr))) #else #define Tcl_GetIndexFromObjStruct(interp, objPtr, tablePtr, offset, msg, flags, indexPtr) \ ((Tcl_GetIndexFromObjStruct)((interp), (objPtr), (tablePtr), (offset), (msg), (flags)|(int)(sizeof(*(indexPtr))<<1), (indexPtr))) #define Tcl_GetBooleanFromObj(interp, objPtr, boolPtr) \ - (sizeof(*(boolPtr)) == sizeof(int) ? Tcl_GetBooleanFromObj(interp, objPtr, (int *)(boolPtr)) : \ + (sizeof(*(boolPtr)) >= sizeof(int) ? (TCLBOOLWARNING(boolPtr)Tcl_GetBooleanFromObj(interp, objPtr, (int *)(boolPtr))) : \ Tcl_GetBoolFromObj(interp, objPtr, (TCL_NULL_OK-2)&(int)sizeof((*(boolPtr))), (char *)(boolPtr))) #define Tcl_GetBoolean(interp, src, boolPtr) \ - (sizeof(*(boolPtr)) == sizeof(int) ? Tcl_GetBoolean(interp, src, (int *)(boolPtr)) : \ + (sizeof(*(boolPtr)) >= sizeof(int) ? (TCLBOOLWARNING(boolPtr)Tcl_GetBoolean(interp, src, (int *)(boolPtr))) : \ Tcl_GetBool(interp, src, (TCL_NULL_OK-2)&(int)sizeof((*(boolPtr))), (char *)(boolPtr))) #endif -- cgit v0.12