summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2023-01-19 17:01:40 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2023-01-19 17:01:40 (GMT)
commit22657ad365387756101eef242c94c8989688955c (patch)
tree95c72304e293323e70edc353bd97a6d28a11a042
parent02838226febd8e93a6b8ba05bb7d45749d2b2ed4 (diff)
downloadtcl-22657ad365387756101eef242c94c8989688955c.zip
tcl-22657ad365387756101eef242c94c8989688955c.tar.gz
tcl-22657ad365387756101eef242c94c8989688955c.tar.bz2
New proposal: Allow "-strict" immediately before or after "-failindex var".
-rw-r--r--doc/encoding.n4
-rw-r--r--generic/tclCmdAH.c43
-rw-r--r--generic/tclEncoding.c7
-rw-r--r--generic/tclIO.h1
-rw-r--r--tests/cmdAH.test58
-rw-r--r--tests/encoding.test4
-rw-r--r--tests/safe.test8
7 files changed, 97 insertions, 28 deletions
diff --git a/doc/encoding.n b/doc/encoding.n
index 7eae61a..24ca1c7 100644
--- a/doc/encoding.n
+++ b/doc/encoding.n
@@ -48,7 +48,7 @@ in case of a conversion error, the position of the input byte causing the error
is returned in the given variable. The return value of the command are the
converted characters until the first error position.
In case of no error, the value \fI-1\fR is written to the variable. This option
-may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR.
+may not be used together with \fB-nocomplain\fR.
.PP
The option \fB-nocomplain\fR has no effect and is available for compatibility with
TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue.
@@ -81,7 +81,7 @@ in case of a conversion error, the position of the input character causing the e
is returned in the given variable. The return value of the command are the
converted bytes until the first error position. No error condition is raised.
In case of no error, the value \fI-1\fR is written to the variable. This option
-may not be used together with \fB-nocomplain\fR, and it already implies \fB-strict\fR.
+may not be used together with \fB-nocomplain\fR.
.PP
The option \fB-nocomplain\fR has no effect and is available for compatibility with
TCL 9. In TCL 9, the encoding command fails with an error on any encoding issue.
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 016bd02..72cc618 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -12,6 +12,7 @@
*/
#include "tclInt.h"
+#include "tclIO.h"
#ifdef _WIN32
# include "tclWinInt.h"
#endif
@@ -574,7 +575,7 @@ EncodingConvertfromObjCmd(
if (objc == 2) {
encoding = Tcl_GetEncoding(interp, NULL);
data = objv[1];
- } else if (objc > 2 && objc < 6) {
+ } else if (objc > 2 && objc < 7) {
int objcUnprocessed = objc;
data = objv[objc - 1];
bytesPtr = Tcl_GetString(objv[1]);
@@ -586,6 +587,16 @@ EncodingConvertfromObjCmd(
&& !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) {
flags = TCL_ENCODING_STRICT;
objcUnprocessed--;
+ bytesPtr = Tcl_GetString(objv[2]);
+ if (bytesPtr[0] == '-' && bytesPtr[1] == 'f'
+ && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) {
+ /* at least two additional arguments needed */
+ if (objc < 6) {
+ goto encConvFromError;
+ }
+ failVarObj = objv[3];
+ objcUnprocessed -= 2;
+ }
} else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f'
&& !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) {
/* at least two additional arguments needed */
@@ -593,8 +604,14 @@ EncodingConvertfromObjCmd(
goto encConvFromError;
}
failVarObj = objv[2];
- flags = TCL_ENCODING_STRICT;
+ flags = ENCODING_FAILINDEX;
objcUnprocessed -= 2;
+ bytesPtr = Tcl_GetString(objv[3]);
+ if (bytesPtr[0] == '-' && bytesPtr[1] == 's'
+ && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) {
+ flags = TCL_ENCODING_STRICT;
+ objcUnprocessed --;
+ }
}
switch (objcUnprocessed) {
case 3:
@@ -610,7 +627,7 @@ EncodingConvertfromObjCmd(
}
} else {
encConvFromError:
- Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data");
+ Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data");
return TCL_ERROR;
}
@@ -725,6 +742,16 @@ EncodingConverttoObjCmd(
&& !strncmp(stringPtr, "-strict", strlen(stringPtr))) {
flags = TCL_ENCODING_STRICT;
objcUnprocessed--;
+ stringPtr = Tcl_GetString(objv[2]);
+ if (stringPtr[0] == '-' && stringPtr[1] == 'f'
+ && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) {
+ /* at least two additional arguments needed */
+ if (objc < 6) {
+ goto encConvToError;
+ }
+ failVarObj = objv[3];
+ objcUnprocessed -= 2;
+ }
} else if (stringPtr[0] == '-' && stringPtr[1] == 'f'
&& !strncmp(stringPtr, "-failindex", strlen(stringPtr))) {
/* at least two additional arguments needed */
@@ -732,8 +759,14 @@ EncodingConverttoObjCmd(
goto encConvToError;
}
failVarObj = objv[2];
- flags = TCL_ENCODING_STRICT;
+ flags = TCL_ENCODING_STOPONERROR;
objcUnprocessed -= 2;
+ stringPtr = Tcl_GetString(objv[3]);
+ if (stringPtr[0] == '-' && stringPtr[1] == 's'
+ && !strncmp(stringPtr, "-strict", strlen(stringPtr))) {
+ flags = TCL_ENCODING_STRICT;
+ objcUnprocessed --;
+ }
}
switch (objcUnprocessed) {
case 3:
@@ -749,7 +782,7 @@ EncodingConverttoObjCmd(
}
} else {
encConvToError:
- Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-strict|-failindex var? ?encoding? data");
+ Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|?-strict? ?-failindex var?? ?encoding? data");
return TCL_ERROR;
}
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index ca96057..2f7d803 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -10,6 +10,7 @@
*/
#include "tclInt.h"
+#include "tclIO.h"
typedef size_t (LengthProc)(const char *src);
@@ -2386,9 +2387,9 @@ UtfToUtfProc(
*dst++ = *src++;
} else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
- && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) {
+ && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
/*
- * If in input mode, and -strict is specified: This is an error.
+ * If in input mode, and -strict or -failindex is specified: This is an error.
*/
if (flags & TCL_ENCODING_MODIFIED) {
result = TCL_CONVERT_SYNTAX;
@@ -2413,7 +2414,7 @@ UtfToUtfProc(
result = TCL_CONVERT_MULTIBYTE;
break;
}
- if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
+ if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) {
result = TCL_CONVERT_SYNTAX;
break;
}
diff --git a/generic/tclIO.h b/generic/tclIO.h
index fbd01ee..a69e990 100644
--- a/generic/tclIO.h
+++ b/generic/tclIO.h
@@ -284,6 +284,7 @@ typedef struct ChannelState {
* usable, but it may not be closed
* again from within the close
* handler. */
+#define ENCODING_FAILINDEX (1<<20) /* Internal flag, fail on Invalid bytes only */
#define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed.
* No further Tcl-level write IO on
* the channel is allowed. */
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index 3533cb6..9d51951 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -179,7 +179,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body {
} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system}
test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertto
-} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertto foo bar
} -result {unknown encoding "foo"}
@@ -201,7 +201,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup {
} -result 8C
test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertfrom
-} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertfrom foo bar
} -result {unknown encoding "foo"}
@@ -238,10 +238,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup {
test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body {
encoding convertfrom -nocomplain -failindex 2 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body {
encoding convertto -nocomplain -failindex 2 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body {
encoding convertfrom -failindex 2 -nocomplain ABC
} -returnCodes 1 -result {unknown encoding "-nocomplain"}
@@ -250,19 +250,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body
} -returnCodes 1 -result {unknown encoding "-nocomplain"}
test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body {
encoding convertfrom -nocomplain -failindex 2 utf-8 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body {
encoding convertto -nocomplain -failindex 2 utf-8 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body {
encoding convertfrom -failindex 2 -nocomplain utf-8 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body {
encoding convertto -failindex 2 -nocomplain utf-8 ABC
-} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body {
encoding convertfrom -failindex ABC
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup {
proc encoding_test {} {
encoding convertfrom -failindex ABC
@@ -270,12 +270,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi
} -body {
# Compile and execute
encoding_test
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup {
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup {
rename encoding_test ""
}
test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body {
encoding convertto -failindex ABC
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup {
proc encoding_test {} {
encoding convertto -failindex ABC
@@ -283,7 +283,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi
} -body {
# Compile and execute
encoding_test
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"} -cleanup {
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"} -cleanup {
rename encoding_test ""
}
test cmdAH-4.19.1 {convertrom -failindex with correct data} -body {
@@ -333,6 +333,40 @@ test cmdAH-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -
} -returnCodes 0 -result {41 1} -cleanup {
rename encoding_test ""
}
+test cmdAH-4.20.3 {convertrom -failindex with incomplete utf8} -body {
+ set x [encoding convertfrom -strict -failindex i utf-8 A\xc3]
+ binary scan $x H* y
+ list $y $i
+} -returnCodes 0 -result {41 1}
+test cmdAH-4.20.4 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup {
+ proc encoding_test {} {
+ set x [encoding convertfrom -strict -failindex i utf-8 A\xc3]
+ binary scan $x H* y
+ list $y $i
+ }
+} -body {
+ # Compile and execute
+ encoding_test
+} -returnCodes 0 -result {41 1} -cleanup {
+ rename encoding_test ""
+}
+test cmdAH-4.20.5 {convertrom -failindex with incomplete utf8} -body {
+ set x [encoding convertfrom -failindex i -strict utf-8 A\xc3]
+ binary scan $x H* y
+ list $y $i
+} -returnCodes 0 -result {41 1}
+test cmdAH-4.20.6 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup {
+ proc encoding_test {} {
+ set x [encoding convertfrom -failindex i -strict utf-8 A\xc3]
+ binary scan $x H* y
+ list $y $i
+ }
+} -body {
+ # Compile and execute
+ encoding_test
+} -returnCodes 0 -result {41 1} -cleanup {
+ rename encoding_test ""
+}
test cmdAH-4.21.1 {convertto -failindex with wrong character} -body {
set x [encoding convertto -failindex i iso8859-1 A\u0141]
binary scan $x H* y
diff --git a/tests/encoding.test b/tests/encoding.test
index a1d129e..095672c 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -681,10 +681,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} {
} 1
test encoding-24.22 {Syntax error, two encodings} -body {
encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test encoding-24.23 {Syntax error, two encodings} -body {
encoding convertto iso8859-1 utf-8 "ZX\uD800"
-} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test encoding-24.24 {Parse invalid utf-8 with -strict} -body {
encoding convertfrom -strict utf-8 "\xC0\x80\x00\x00"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
diff --git a/tests/safe.test b/tests/safe.test
index f2c0862..be1ce57 100644
--- a/tests/safe.test
+++ b/tests/safe.test
@@ -1473,7 +1473,7 @@ test safe-11.7 {testing safe encoding} -setup {
interp eval $i encoding convertfrom
} -returnCodes error -cleanup {
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test safe-11.7.1 {testing safe encoding} -setup {
set i [safe::interpCreate]
} -body {
@@ -1482,7 +1482,7 @@ test safe-11.7.1 {testing safe encoding} -setup {
} -match glob -cleanup {
unset -nocomplain m o
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-strict|-failindex var? ?encoding? data"
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"
while executing
"encoding convertfrom"
invoked from within
@@ -1495,7 +1495,7 @@ test safe-11.8 {testing safe encoding} -setup {
interp eval $i encoding convertto
} -returnCodes error -cleanup {
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"}
+} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"}
test safe-11.8.1 {testing safe encoding} -setup {
set i [safe::interpCreate]
} -body {
@@ -1504,7 +1504,7 @@ test safe-11.8.1 {testing safe encoding} -setup {
} -match glob -cleanup {
unset -nocomplain m o
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertto ?-nocomplain|-strict|-failindex var? ?encoding? data"
+} -result {wrong # args: should be "encoding convertto ?-nocomplain|?-strict? ?-failindex var?? ?encoding? data"
while executing
"encoding convertto"
invoked from within