diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-09-12 20:41:29 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-09-12 20:41:29 (GMT) |
commit | 393743bb7088f57b28cd5f98d2c9f70189807a2e (patch) | |
tree | aaedea2d95e3a091ee27030aa564047769784c1c | |
parent | c2626689e16b104637564825cd61b1b0ff14dfc2 (diff) | |
download | tcl-393743bb7088f57b28cd5f98d2c9f70189807a2e.zip tcl-393743bb7088f57b28cd5f98d2c9f70189807a2e.tar.gz tcl-393743bb7088f57b28cd5f98d2c9f70189807a2e.tar.bz2 |
Start TIP #346 implementation: For now only \xC0\x80
-rw-r--r-- | generic/tcl.h | 1 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 22 | ||||
-rw-r--r-- | generic/tclEncoding.c | 8 | ||||
-rw-r--r-- | tests/cmdAH.test | 24 | ||||
-rw-r--r-- | tests/encoding.test | 4 | ||||
-rw-r--r-- | tests/safe.test | 8 |
6 files changed, 41 insertions, 26 deletions
diff --git a/generic/tcl.h b/generic/tcl.h index f17d43e..acff803 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2118,6 +2118,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 #define TCL_ENCODING_NOCOMPLAIN 0x40 +#define TCL_ENCODING_STRICT 0x44 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 28fc210..572a995 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -564,8 +564,10 @@ EncodingConvertfromObjCmd( * 2) encoding data -> objc = 3 * 3) -nocomplain data -> objc = 3 * 4) -nocomplain encoding data -> objc = 4 - * 5) -failindex val data -> objc = 4 - * 6) -failindex val encoding data -> objc = 5 + * 5) -strict data -> objc = 3 + * 6) -strict encoding data -> objc = 4 + * 7) -failindex val data -> objc = 4 + * 8) -failindex val encoding data -> objc = 5 */ if (objc == 2) { @@ -579,6 +581,10 @@ EncodingConvertfromObjCmd( && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { flags = TCL_ENCODING_NOCOMPLAIN; objcUnprocessed--; + } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' + && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { + flags = TCL_ENCODING_STRICT; + objcUnprocessed--; } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { /* at least two additional arguments needed */ @@ -603,7 +609,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); return TCL_ERROR; } @@ -621,7 +627,7 @@ EncodingConvertfromObjCmd( } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); - if (!(flags & TCL_ENCODING_NOCOMPLAIN) && (result != TCL_INDEX_NONE)) { + if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { if (failVarObj != NULL) { if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { return TCL_ERROR; @@ -714,6 +720,10 @@ EncodingConverttoObjCmd( && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { flags = TCL_ENCODING_NOCOMPLAIN; objcUnprocessed--; + } else if (stringPtr[0] == '-' && stringPtr[1] == 's' + && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { + flags = TCL_ENCODING_STRICT; + objcUnprocessed--; } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { /* at least two additional arguments needed */ @@ -738,7 +748,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); return TCL_ERROR; } @@ -749,7 +759,7 @@ EncodingConverttoObjCmd( stringPtr = TclGetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); - if (!(flags & TCL_ENCODING_NOCOMPLAIN) && (result != TCL_INDEX_NONE)) { + if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { if (failVarObj != NULL) { /* I hope, wide int will cover size_t data type */ if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 0ce75b4..9c4b5ce 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2288,7 +2288,7 @@ BinaryProc( */ #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) -# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN) +# define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN) || (flags & TCL_ENCODING_STOPONERROR)) #else # define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) #endif @@ -2359,10 +2359,14 @@ UtfToUtfProc( *dst++ = *src++; } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) - && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED)) { + && (UCHAR(src[1]) == 0x80) && (!(flags & TCL_ENCODING_MODIFIED) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT))) { /* * Convert 0xC080 to real nulls when we are in output mode. */ + if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) { + result = TCL_CONVERT_UNKNOWN; + break; + } *dst++ = 0; src += 2; diff --git a/tests/cmdAH.test b/tests/cmdAH.test index ab1a8e6..64991af 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,7 +178,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -200,7 +200,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -237,10 +237,10 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { test cmdAH-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertto -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} @@ -249,19 +249,19 @@ test cmdAH-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body } -returnCodes 1 -result {unknown encoding "-nocomplain"} test cmdAH-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertto -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertto -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertfrom -failindex ABC @@ -269,12 +269,12 @@ test cmdAH-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { encoding convertto -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { proc encoding_test {} { encoding convertto -failindex ABC @@ -282,7 +282,7 @@ test cmdAH-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compi } -body { # Compile and execute encoding_test -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} -cleanup { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} -cleanup { rename encoding_test "" } test cmdAH-4.19.1 {convertrom -failindex with correct data} -body { diff --git a/tests/encoding.test b/tests/encoding.test index 6f11968..c8f409e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -669,10 +669,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} file delete [file join [temporaryDirectory] iso2022.txt] diff --git a/tests/safe.test b/tests/safe.test index fc7c814..148215a 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1291,7 +1291,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1300,7 +1300,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-strict? ?-failindex var? ?encoding? data" while executing "encoding convertto" invoked from within |