From 9fdb32b8e254da15698c28d65e281ee946a57eb5 Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 14 Mar 2022 15:46:35 +0000 Subject: TIP607 encoding failindex: start implementation --- generic/tclCmdAH.c | 66 +++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 18 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 60a2c42..96bac4e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -556,28 +556,59 @@ EncodingConvertfromObjCmd( int flags = TCL_ENCODING_NOCOMPLAIN; #endif size_t result; + Tcl_Obj *failVarObj = NULL; + int i, encodingSeen = 0; + /* + * Decode parameters: + * Possible combinations: + * 1) data -> objc = 2 + * 2) encoding data -> objc = 3 + * 3) -nocomplain data -> objc = 3 (8.7) + * 4) -nocomplain encoding data -> objc = 4 (8.7) + * 5) -failindex val data -> objc = 4 + * 6) -failindex val encoding data -> objc = 5 + * 7a) -nocomplain -failindex val data -> objc = 5 + * 7b) -failindex val -nocomplain data -> objc = 5 + * 8a) -nocomplain -failindex val encoding data -> objc = 6 + * 8b) -failindex val -nocomplain encoding data -> objc = 6 + */ - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if ((unsigned)(objc - 2) < 3) { + if (objc > 1 && objc < 7) { + int noComplaintSeen = 0; + int encodingSeen = 0; data = objv[objc - 1]; - bytesPtr = Tcl_GetString(objv[1]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - } else if (objc < 4) { - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; + for(i = 1; i < objc-1 ; i++ ) { + bytesPtr = Tcl_GetString(objv[i]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' + && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { + if (noComplaintSeen) { + goto encConvFromError; + } + flags = TCL_ENCODING_NOCOMPLAIN; + noComplaintSeen = 1; + } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' + && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { + /* at least two additional arguments needed */ + if (objc < i + 3) { + goto encConvFromError; + } + if (failVarObj != NULL) { + goto encConvFromError; + } + i++; + failVarObj = objv[i]; + flags = TCL_ENCODING_NOCOMPLAIN; + } else if (i == objc - 2) { + if (Tcl_GetEncodingFromObj(interp, objv[i], &encoding) != TCL_OK) { + return TCL_ERROR; + } + encodingSeen = 1; + } else { + goto encConvFromError; } - goto encConvFromOK; - } else { - goto encConvFromError; } - if (objc < 4) { + if (!encodingSeen) { encoding = Tcl_GetEncoding(interp, NULL); - } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; } } else { encConvFromError: @@ -585,7 +616,6 @@ EncodingConvertfromObjCmd( return TCL_ERROR; } -encConvFromOK: /* * Convert the string into a byte array in 'ds' */ -- cgit v0.12 From e1f11871dd6bf0d90bef16897153fd0c00c9d136 Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 14 Mar 2022 16:10:27 +0000 Subject: TIP607 encoding failindex: options -failindex and -nocomplain may not both be specified --- generic/tclCmdAH.c | 67 +++++++++++++++++++++++------------------------------- 1 file changed, 29 insertions(+), 38 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 96bac4e..6c15630 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -557,7 +557,6 @@ EncodingConvertfromObjCmd( #endif size_t result; Tcl_Obj *failVarObj = NULL; - int i, encodingSeen = 0; /* * Decode parameters: * Possible combinations: @@ -567,52 +566,44 @@ EncodingConvertfromObjCmd( * 4) -nocomplain encoding data -> objc = 4 (8.7) * 5) -failindex val data -> objc = 4 * 6) -failindex val encoding data -> objc = 5 - * 7a) -nocomplain -failindex val data -> objc = 5 - * 7b) -failindex val -nocomplain data -> objc = 5 - * 8a) -nocomplain -failindex val encoding data -> objc = 6 - * 8b) -failindex val -nocomplain encoding data -> objc = 6 */ - if (objc > 1 && objc < 7) { - int noComplaintSeen = 0; - int encodingSeen = 0; + if (objc == 2) { + encoding = Tcl_GetEncoding(interp, NULL); + data = objv[1]; + } else if ((unsigned)(objc - 2) < 4) { + int objcUnprocessed = objc; data = objv[objc - 1]; - for(i = 1; i < objc-1 ; i++ ) { - bytesPtr = Tcl_GetString(objv[i]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { - if (noComplaintSeen) { - goto encConvFromError; - } - flags = TCL_ENCODING_NOCOMPLAIN; - noComplaintSeen = 1; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < i + 3) { - goto encConvFromError; - } - if (failVarObj != NULL) { - goto encConvFromError; - } - i++; - failVarObj = objv[i]; - flags = TCL_ENCODING_NOCOMPLAIN; - } else if (i == objc - 2) { - if (Tcl_GetEncodingFromObj(interp, objv[i], &encoding) != TCL_OK) { - return TCL_ERROR; - } - encodingSeen = 1; - } else { + bytesPtr = Tcl_GetString(objv[1]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' + && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { + flags = TCL_ENCODING_NOCOMPLAIN; + objcUnprocessed--; + } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' + && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { + /* at least two additional arguments needed */ + if (objc < 4) { goto encConvFromError; } + failVarObj = objv[2]; + flags = TCL_ENCODING_NOCOMPLAIN; + objcUnprocessed -= 2; } - if (!encodingSeen) { - encoding = Tcl_GetEncoding(interp, NULL); + switch (objcUnprocessed) { + case 2: + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + break; + case 1: + encoding = Tcl_GetEncoding(interp, NULL); + break; + default: + goto encConvFromError; } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-failindex var? ?encoding? data"); return TCL_ERROR; } -- cgit v0.12 From 938fc2c7a0aef1a5d7655f1e0227d57b6d518f1d Mon Sep 17 00:00:00 2001 From: oehhar Date: Mon, 14 Mar 2022 17:39:22 +0000 Subject: TIP607 encoding failindex: some tests and implementation (not working) --- generic/tclCmdAH.c | 37 ++++++++++++++++++++++++------------- tests/encoding.test | 28 +++++++++++++++++++++++++++- 2 files changed, 51 insertions(+), 14 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 6c15630..5b95e51 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -555,7 +555,7 @@ EncodingConvertfromObjCmd( #else int flags = TCL_ENCODING_NOCOMPLAIN; #endif - size_t result; + size_t result, errorPosition = 0; Tcl_Obj *failVarObj = NULL; /* * Decode parameters: @@ -571,7 +571,7 @@ EncodingConvertfromObjCmd( if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if ((unsigned)(objc - 2) < 4) { + } else if (objc > 2 && objc < 6) { int objcUnprocessed = objc; data = objv[objc - 1]; bytesPtr = Tcl_GetString(objv[1]); @@ -586,16 +586,16 @@ EncodingConvertfromObjCmd( goto encConvFromError; } failVarObj = objv[2]; - flags = TCL_ENCODING_NOCOMPLAIN; + flags = TCL_ENCODING_STOPONERROR; objcUnprocessed -= 2; } switch (objcUnprocessed) { - case 2: + case 3: if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } break; - case 1: + case 2: encoding = Tcl_GetEncoding(interp, NULL); break; default: @@ -622,14 +622,25 @@ EncodingConvertfromObjCmd( result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); if ((flags & TCL_ENCODING_STOPONERROR) && (result != (size_t)-1)) { - char buf[TCL_INTEGER_SPACE]; - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); - return TCL_ERROR; + if (failVarObj != NULL) { + /* I hope, wide int will cover size_t data type */ + if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } + } else { + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" + TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + } else if (failVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } } /* diff --git a/tests/encoding.test b/tests/encoding.test index bf82493..7020077 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -669,10 +669,36 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"} +test encoding-24.24 {Syntax error, no parameter} -body { + encoding convertfrom +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +test encoding-24.25 {Syntax error, -nocomplain and -failindex, no encoding} -body { + encoding convertfrom -nocomplain -failindex 2 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +test encoding-24.26 {Syntax error, -failindex and -nocomplain, no encoding} -body { + encoding convertfrom -failindex 2 -nocomplain ABC +} -returnCodes 1 -result {unknown encoding "-nocomplain"} +test encoding-24.27 {Syntax error, -nocomplain and -failindex, encoding} -body { + encoding convertfrom -nocomplain -failindex 2 utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +test encoding-24.28 {Syntax error, -failindex and -nocomplain, encoding} -body { + encoding convertfrom -failindex 2 -nocomplain utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +test encoding-24.29 {Syntax error, -failindex with no var, no encoding} -body { + encoding convertfrom -failindex ABC +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +test encoding-24.30 {convertrom -failindex with correct data} -body { + encoding convertfrom -failindex test ABC + set test +} -returnCodes 0 -result -1 +test encoding-24.31 {convertrom -failindex with incomplete utf8} -body { + set res [encoding convertfrom -failindex test A\xc3] + lappend res $test +} -returnCodes 0 -result {A 1} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 1975c98a5c09989a767f8288beec0e67abf408a3 Mon Sep 17 00:00:00 2001 From: oehhar Date: Tue, 15 Mar 2022 07:09:40 +0000 Subject: TIP607 encoding failindex: test correction --- tests/encoding.test | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index 7a1e4e7..4284254 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -696,9 +696,10 @@ test encoding-24.30 {convertrom -failindex with correct data} -body { set test } -returnCodes 0 -result -1 test encoding-24.31 {convertrom -failindex with incomplete utf8} -body { - set res [encoding convertfrom -failindex test A\xc3] - lappend res $test -} -returnCodes 0 -result {A 1} + set x [encoding convertfrom -failindex i A\xc3] + binary scan $x H* y + list $y $i +} -returnCodes 0 -result {41 1} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 60b2db50fcc19a83a5c737e5cadfcbd8f7f3810b Mon Sep 17 00:00:00 2001 From: oehhar Date: Tue, 15 Mar 2022 10:15:30 +0000 Subject: win/makefile.vc: required correction for noembed,symbols to allow debugging with MS-VS2015. Otherwise, symbols are not detected within DLL. --- win/makefile.vc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/win/makefile.vc b/win/makefile.vc index 2687e1c..d15f844 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -584,6 +584,7 @@ $(OUT_DIR)\tommath.lib: $(TOMMATHDIR)\win64\tommath.lib $(COPY) $(TOMMATHDIR)\win64\tommath.lib $(OUT_DIR)\tommath.lib !endif +!if $(TCL_EMBED_SCRIPTS) $(TCLSCRIPTZIP): $(TCLDDELIB) $(TCLREGLIB) @echo Building Tcl library zip file @if exist "$(LIBTCLVFS)" $(RMDIR) "$(LIBTCLVFS)" @@ -603,7 +604,7 @@ $(TCLSCRIPTZIP): $(TCLDDELIB) $(TCLREGLIB) @echo file delete -force {$@} > "$(OUT_DIR)\zipper.tcl" @echo zipfs mkzip {$@} {$(LIBTCLVFS)} {$(LIBTCLVFS)} >> "$(OUT_DIR)\zipper.tcl" @cd "$(OUT_DIR)" && $(TCLSH_NATIVE) zipper.tcl - +!endif pkgs: @for /d %d in ($(PKGSDIR)\*) do \ -- cgit v0.12 From ea69616a5dd24c1d6c78e20ee260956e766342ea Mon Sep 17 00:00:00 2001 From: oehhar Date: Tue, 15 Mar 2022 10:28:48 +0000 Subject: TIP607 encoding failindex: correct test which works now. --- tests/encoding.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index 4284254..f4343c4 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -696,7 +696,7 @@ test encoding-24.30 {convertrom -failindex with correct data} -body { set test } -returnCodes 0 -result -1 test encoding-24.31 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i A\xc3] + set x [encoding convertfrom -failindex i utf-8 A\xc3] binary scan $x H* y list $y $i } -returnCodes 0 -result {41 1} -- cgit v0.12 From 48a30dd04e32f374258f8f6eacc40c48a1227ee7 Mon Sep 17 00:00:00 2001 From: oehhar Date: Wed, 16 Mar 2022 18:20:29 +0000 Subject: TIP607 encoding failindex: revert ckeckin [add9ed8887] : just wait for symbol load in VS2015 and it will work. Sorry, Ashok ! --- win/makefile.vc | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/win/makefile.vc b/win/makefile.vc index d15f844..abbf840 100644 --- a/win/makefile.vc +++ b/win/makefile.vc @@ -52,7 +52,7 @@ # turn on the 64-bit compiler, if your SDK has it. # # Basic macros and options usable on the commandline (see rules.vc for more info): -# OPTS=nomsvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,unchecked,none +# OPTS=nomsvcrt,noembed,nothreads,pdbs,profile,static,symbols,thrdalloc,unchecked,utf16,none # Sets special options for the core. The default is for none. # Any combination of the above may be used (comma separated). # 'none' will over-ride everything to nothing. @@ -584,7 +584,6 @@ $(OUT_DIR)\tommath.lib: $(TOMMATHDIR)\win64\tommath.lib $(COPY) $(TOMMATHDIR)\win64\tommath.lib $(OUT_DIR)\tommath.lib !endif -!if $(TCL_EMBED_SCRIPTS) $(TCLSCRIPTZIP): $(TCLDDELIB) $(TCLREGLIB) @echo Building Tcl library zip file @if exist "$(LIBTCLVFS)" $(RMDIR) "$(LIBTCLVFS)" @@ -604,7 +603,7 @@ $(TCLSCRIPTZIP): $(TCLDDELIB) $(TCLREGLIB) @echo file delete -force {$@} > "$(OUT_DIR)\zipper.tcl" @echo zipfs mkzip {$@} {$(LIBTCLVFS)} {$(LIBTCLVFS)} >> "$(OUT_DIR)\zipper.tcl" @cd "$(OUT_DIR)" && $(TCLSH_NATIVE) zipper.tcl -!endif + pkgs: @for /d %d in ($(PKGSDIR)\*) do \ -- cgit v0.12 From 190439cf96a3f4399b008c47251c4f9956c61878 Mon Sep 17 00:00:00 2001 From: oehhar Date: Thu, 17 Mar 2022 13:23:30 +0000 Subject: TIP607 encoding failindex: correct error message, test bytecompiled version --- generic/tclCmdAH.c | 2 +- tests/encoding.test | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 10 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index b152369..9772c56 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -467,7 +467,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-failindex var? ?encoding? data"); return TCL_ERROR; } diff --git a/tests/encoding.test b/tests/encoding.test index f4343c4..9bd0e6b 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -669,37 +669,70 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"} test encoding-24.24 {Syntax error, no parameter} -body { encoding convertfrom -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test encoding-24.25 {Syntax error, -nocomplain and -failindex, no encoding} -body { encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test encoding-24.26 {Syntax error, -failindex and -nocomplain, no encoding} -body { encoding convertfrom -failindex 2 -nocomplain ABC } -returnCodes 1 -result {unknown encoding "-nocomplain"} test encoding-24.27 {Syntax error, -nocomplain and -failindex, encoding} -body { encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test encoding-24.28 {Syntax error, -failindex and -nocomplain, encoding} -body { encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.29 {Syntax error, -failindex with no var, no encoding} -body { +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-24.29.1 {Syntax error, -failindex with no var, no encoding} -body { encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.30 {convertrom -failindex with correct data} -body { +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-24.29.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { + proc encoding_test {} { + encoding convertfrom -failindex ABC + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} -cleanup { + rename encoding_test "" +} +test encoding-24.30.1 {convertrom -failindex with correct data} -body { encoding convertfrom -failindex test ABC set test } -returnCodes 0 -result -1 -test encoding-24.31 {convertrom -failindex with incomplete utf8} -body { +test encoding-24.30.2 {convertrom -failindex with correct data (byt compiled)} -setup { + proc encoding_test {} { + encoding convertfrom -failindex test ABC + set test + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result -1 -cleanup { + rename encoding_test "" +} +test encoding-24.31.1 {convertrom -failindex with incomplete utf8} -body { set x [encoding convertfrom -failindex i utf-8 A\xc3] binary scan $x H* y list $y $i } -returnCodes 0 -result {41 1} +test encoding-24.31.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { + proc encoding_test {} { + set x [encoding convertfrom -failindex i utf-8 A\xc3] + binary scan $x H* y + list $y $i + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result {41 1} -cleanup { + rename encoding_test "" +} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 3fb8ab60ef408acf79ad86e0ceef78d7e90650ea Mon Sep 17 00:00:00 2001 From: oehhar Date: Thu, 17 Mar 2022 16:30:59 +0000 Subject: TIP607 encoding failindex: also implement encoding convertto, move tests to cmdAH.test, as the other user interface tests (expect one) is also there. --- generic/tclCmdAH.c | 89 ++++++++++++++++++++++++++------------- tests/cmdAH.test | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++- tests/encoding.test | 31 +------------- 3 files changed, 180 insertions(+), 59 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 95ca18a..70767ae 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -426,8 +426,8 @@ EncodingConvertfromObjCmd( * Possible combinations: * 1) data -> objc = 2 * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 (8.7) - * 4) -nocomplain encoding data -> objc = 4 (8.7) + * 3) -nocomplain data -> objc = 3 + * 4) -nocomplain encoding data -> objc = 4 * 5) -failindex val data -> objc = 4 * 6) -failindex val encoding data -> objc = 5 */ @@ -467,7 +467,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain|-failindex var? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-failindex var? ?encoding? data"); return TCL_ERROR; } @@ -544,42 +544,64 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - size_t result; + size_t result, errorPosition = 0; + Tcl_Obj *failVarObj = NULL; #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) int flags = TCL_ENCODING_STOPONERROR; #else int flags = TCL_ENCODING_NOCOMPLAIN; #endif + /* + * Decode parameters: + * Possible combinations: + * 1) data -> objc = 2 + * 2) encoding data -> objc = 3 + * 3) -nocomplain data -> objc = 3 + * 4) -nocomplain encoding data -> objc = 4 + * 5) -failindex val data -> objc = 4 + * 6) -failindex val encoding data -> objc = 5 + */ + if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if ((unsigned)(objc - 2) < 3) { + } else if (objc > 2 && objc < 6) { + int objcUnprocessed = objc; data = objv[objc - 1]; stringPtr = Tcl_GetString(objv[1]); if (stringPtr[0] == '-' && stringPtr[1] == 'n' && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { flags = TCL_ENCODING_NOCOMPLAIN; - } else if (objc < 4) { - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; + objcUnprocessed--; + } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' + && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { + /* at least two additional arguments needed */ + if (objc < 4) { + goto encConvToError; } - goto encConvToOK; - } else { - goto encConvToError; + failVarObj = objv[2]; + flags = TCL_ENCODING_STOPONERROR; + objcUnprocessed -= 2; } - if (objc < 4) { - encoding = Tcl_GetEncoding(interp, NULL); - } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; + switch (objcUnprocessed) { + case 3: + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + break; + case 2: + encoding = Tcl_GetEncoding(interp, NULL); + break; + default: + goto encConvToError; } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-failindex var? ?encoding? data"); return TCL_ERROR; } -encConvToOK: /* * Convert the string to a byte array in 'ds' */ @@ -588,17 +610,28 @@ encConvToOK: result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); if ((flags & TCL_ENCODING_STOPONERROR) && (result != (size_t)-1)) { - size_t pos = Tcl_NumUtfChars(stringPtr, result); - int ucs4; - char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&stringPtr[result], &ucs4); - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); - return TCL_ERROR; + if (failVarObj != NULL) { + /* I hope, wide int will cover size_t data type */ + if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } + } else { + size_t pos = Tcl_NumUtfChars(stringPtr, result); + int ucs4; + char buf[TCL_INTEGER_SPACE]; + TclUtfToUCS4(&stringPtr[result], &ucs4); + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" + TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } + } else if (failVarObj != NULL) { + if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + return TCL_ERROR; + } } Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), diff --git a/tests/cmdAH.test b/tests/cmdAH.test index d7be68b..facf67d 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -172,7 +172,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -194,7 +194,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} @@ -229,6 +229,121 @@ test cmdAH-4.13 {Tcl_EncodingObjCmd} -setup { encoding system $system } -result iso8859-1 +test encoding-4.14.1 {Syntax error, -nocomplain and -failindex, no encoding} -body { + encoding convertfrom -nocomplain -failindex 2 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.14.2 {Syntax error, -nocomplain and -failindex, no encoding} -body { + encoding convertto -nocomplain -failindex 2 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.15.1 {Syntax error, -failindex and -nocomplain, no encoding} -body { + encoding convertfrom -failindex 2 -nocomplain ABC +} -returnCodes 1 -result {unknown encoding "-nocomplain"} +test encoding-4.15.2 {Syntax error, -failindex and -nocomplain, no encoding} -body { + encoding convertto -failindex 2 -nocomplain ABC +} -returnCodes 1 -result {unknown encoding "-nocomplain"} +test encoding-4.16.1 {Syntax error, -nocomplain and -failindex, encoding} -body { + encoding convertfrom -nocomplain -failindex 2 utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.16.2 {Syntax error, -nocomplain and -failindex, encoding} -body { + encoding convertto -nocomplain -failindex 2 utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.17.1 {Syntax error, -failindex and -nocomplain, encoding} -body { + encoding convertfrom -failindex 2 -nocomplain utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.17.2 {Syntax error, -failindex and -nocomplain, encoding} -body { + encoding convertto -failindex 2 -nocomplain utf-8 ABC +} -returnCodes 1 -result {wrong # args: should be "encoding convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.18.1 {Syntax error, -failindex with no var, no encoding} -body { + encoding convertfrom -failindex ABC +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.18.2 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { + proc encoding_test {} { + encoding convertfrom -failindex ABC + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} -cleanup { + rename encoding_test "" +} +test encoding-4.18.3 {Syntax error, -failindex with no var, no encoding} -body { + encoding convertto -failindex ABC +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} +test encoding-4.18.4 {Syntax error, -failindex with no var, no encoding (byte compiled)} -setup { + proc encoding_test {} { + encoding convertto -failindex ABC + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} -cleanup { + rename encoding_test "" +} +test encoding-4.19.1 {convertrom -failindex with correct data} -body { + encoding convertfrom -failindex test ABC + set test +} -returnCodes 0 -result -1 +test encoding-4.19.2 {convertrom -failindex with correct data (byt compiled)} -setup { + proc encoding_test {} { + encoding convertfrom -failindex test ABC + set test + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result -1 -cleanup { + rename encoding_test "" +} +test encoding-4.19.3 {convertrom -failindex with correct data} -body { + encoding convertto -failindex test ABC + set test +} -returnCodes 0 -result -1 +test encoding-4.19.4 {convertrom -failindex with correct data (byt compiled)} -setup { + proc encoding_test {} { + encoding convertto -failindex test ABC + set test + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result -1 -cleanup { + rename encoding_test "" +} +test encoding-4.20.1 {convertrom -failindex with incomplete utf8} -body { + set x [encoding convertfrom -failindex i utf-8 A\xc3] + binary scan $x H* y + list $y $i +} -returnCodes 0 -result {41 1} +test encoding-4.20.2 {convertrom -failindex with incomplete utf8 (byte compiled)} -setup { + proc encoding_test {} { + set x [encoding convertfrom -failindex i utf-8 A\xc3] + binary scan $x H* y + list $y $i + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result {41 1} -cleanup { + rename encoding_test "" +} +test encoding-4.21.1 {convertto -failindex with wrong character} -body { + set x [encoding convertto -failindex i iso8859-1 A\u0141] + binary scan $x H* y + list $y $i +} -returnCodes 0 -result {41 1} +test encoding-4.20.2 {convertto -failindex with wrong character (byte compiled)} -setup { + proc encoding_test {} { + set x [encoding convertto -failindex i iso8859-1 A\u0141] + binary scan $x H* y + list $y $i + } +} -body { + # Compile and execute + encoding_test +} -returnCodes 0 -result {41 1} -cleanup { + rename encoding_test "" +} + test cmdAH-5.1 {Tcl_FileObjCmd} -returnCodes error -body { file } -result {wrong # args: should be "file subcommand ?arg ...?"} diff --git a/tests/encoding.test b/tests/encoding.test index 061bc11..5c06b38 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -667,37 +667,10 @@ test encoding-24.21 {Parse with -nocomplain but without providing encoding} { } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"} -test encoding-24.24 {Syntax error, no parameter} -body { - encoding convertfrom -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.25 {Syntax error, -nocomplain and -failindex, no encoding} -body { - encoding convertfrom -nocomplain -failindex 2 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.26 {Syntax error, -failindex and -nocomplain, no encoding} -body { - encoding convertfrom -failindex 2 -nocomplain ABC -} -returnCodes 1 -result {unknown encoding "-nocomplain"} -test encoding-24.27 {Syntax error, -nocomplain and -failindex, encoding} -body { - encoding convertfrom -nocomplain -failindex 2 utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.28 {Syntax error, -failindex and -nocomplain, encoding} -body { - encoding convertfrom -failindex 2 -nocomplain utf-8 ABC -} -returnCodes 1 -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.29 {Syntax error, -failindex with no var, no encoding} -body { - encoding convertfrom -failindex ABC -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain|-failindex var? ?encoding? data"} -test encoding-24.30 {convertrom -failindex with correct data} -body { - encoding convertfrom -failindex test ABC - set test -} -returnCodes 0 -result -1 -test encoding-24.31 {convertrom -failindex with incomplete utf8} -body { - set x [encoding convertfrom -failindex i utf-8 A\xc3] - binary scan $x H* y - list $y $i -} -returnCodes 0 -result {41 1} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?-failindex var? ?encoding? data"} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From ce98d31d01017d2ce8876a1df05eb0d0cf98c0c9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 17 Mar 2022 17:52:46 +0000 Subject: Eliminate "deprecated" constraint: doens't exist in 9.0 any more. Also remove unused variable --- generic/tclCmdAH.c | 2 +- tests/http.test | 2 +- tests/main.test | 2 +- tests/safe.test | 4 ++-- tests/source.test | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 49c7d05..597bb3b 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -539,7 +539,7 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - size_t result, errorPosition = 0; + size_t result; Tcl_Obj *failVarObj = NULL; int flags = 0; diff --git a/tests/http.test b/tests/http.test index e09992d..3b2963e 100644 --- a/tests/http.test +++ b/tests/http.test @@ -661,7 +661,7 @@ test http-7.3 {http::formatQuery} -setup { } -cleanup { http::config -urlencoding $enc } -result "can't read \"formMap(∈)\": no such element in array" -test http-7.4 {http::formatQuery} -constraints deprecated -setup { +test http-7.4 {http::formatQuery} -setup { set enc [http::config -urlencoding] } -body { http::config -urlencoding "iso8859-1" diff --git a/tests/main.test b/tests/main.test index 47b2f1a..4aadd79 100644 --- a/tests/main.test +++ b/tests/main.test @@ -143,7 +143,7 @@ namespace eval ::tcl::test::main { test Tcl_Main-1.8 { Tcl_Main: startup script - -encoding option - mismatched encodings } -constraints { - stdio deprecated + stdio } -setup { set script [makeFile {} script] file delete $script diff --git a/tests/safe.test b/tests/safe.test index d93cb6b..76aeb41 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nocomplain|-failindex var? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?-failindex var? ?encoding? data" while executing "encoding convertfrom" invoked from within diff --git a/tests/source.test b/tests/source.test index 0a9a49f..98aaee2 100644 --- a/tests/source.test +++ b/tests/source.test @@ -275,7 +275,7 @@ test source-7.5 {source -encoding: correct operation} -setup { removeFile source.file rename € {} } -result foo -test source-7.6 {source -encoding: mismatch encoding error} -constraints deprecated -setup { +test source-7.6 {source -encoding: mismatch encoding error} -setup { set sourcefile [makeFile {} source.file] file delete $sourcefile set f [open $sourcefile w] -- cgit v0.12 From ff13acf40513006ce3d0e56049498e5b11cf95bd Mon Sep 17 00:00:00 2001 From: oehhar Date: Thu, 17 Mar 2022 19:48:40 +0000 Subject: TIP607 encoding failindex: user documentation --- doc/encoding.n | 75 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 59 insertions(+), 16 deletions(-) diff --git a/doc/encoding.n b/doc/encoding.n index e78a8e7..2277f9d 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -14,16 +14,10 @@ encoding \- Manipulate encodings .BE .SH INTRODUCTION .PP -Strings in Tcl are logically a sequence of 16-bit Unicode characters. +Strings in Tcl are logically a sequence of Unicode characters. These strings are represented in memory as a sequence of bytes that -may be in one of several encodings: modified UTF\-8 (which uses 1 to 3 -bytes per character), 16-bit -.QW Unicode -(which uses 2 bytes per character, with an endianness that is -dependent on the host architecture), and binary (which uses a single -byte per character but only handles a restricted range of characters). -Tcl does not guarantee to always use the same encoding for the same -string. +may be in one of several encodings: modified UTF\-8 (which uses 1 to 4 +bytes per character), or a custom encoding start as 8 bit binary data. .PP Different operating system interfaces or applications may generate strings in other encodings such as Shift\-JIS. The \fBencoding\fR @@ -34,16 +28,30 @@ formats. Performs one of several encoding related operations, depending on \fIoption\fR. The legal \fIoption\fRs are: .TP -\fBencoding convertfrom\fR ?\fIencoding\fR? \fIdata\fR +\fBencoding convertfrom\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? +?\fIencoding\fR? \fIdata\fR . -Convert \fIdata\fR to Unicode from the specified \fIencoding\fR. The -characters in \fIdata\fR are treated as binary data where the lower -8-bits of each character is taken as a single byte. The resulting -sequence of bytes is treated as a string in the specified -\fIencoding\fR. If \fIencoding\fR is not specified, the current +Convert \fIdata\fR to a Unicode string from the specified \fIencoding\fR. The +characters in \fIdata\fR are 8 bit binary data. The resulting +sequence of bytes is a string created by applying the given \fIencoding\fR +to the data. If \fIencoding\fR is not specified, the current system encoding is used. +. +The call fails on convertion errors, like an incomplete utf-8 sequence. +The option \fB-failindex\fR is followed by a variable name. The variable +is set to \fI-1\fR if no conversion error occured. It is set to the +first error location in \fIdata\fR in case of a conversion error. All data +until this error location is transformed and retured. This option may not +be used together with \fB-nocomplain\fR. +. +The call does not fail on conversion errors, if the option +\fB-nocomplain\fR is given. In this case, any error locations are replaced +by \fB?\fR. Incomplete sequences are written verbatim to the output string. +The purpose of this switch is to gain compatibility to prior versions of TCL. +It is not recommended for any other usage. .TP -\fBencoding convertto\fR ?\fIencoding\fR? \fIstring\fR +\fBencoding convertto\fR ?\fB-nocomplain\fR? ?\fB-failindex var\fR? +?\fIencoding\fR? \fIstring\fR . Convert \fIstring\fR from Unicode to the specified \fIencoding\fR. The result is a sequence of bytes that represents the converted @@ -51,6 +59,21 @@ string. Each byte is stored in the lower 8-bits of a Unicode character (indeed, the resulting string is a binary string as far as Tcl is concerned, at least initially). If \fIencoding\fR is not specified, the current system encoding is used. +. +The call fails on convertion errors, like a Unicode character not representable +in the given \fIencoding\fR. +. +The option \fB-failindex\fR is followed by a variable name. The variable +is set to \fI-1\fR if no conversion error occured. It is set to the +first error location in \fIdata\fR in case of a conversion error. All data +until this error location is transformed and retured. This option may not +be used together with \fB-nocomplain\fR. +. +The call does not fail on conversion errors, if the option +\fB-nocomplain\fR is given. In this case, any error locations are replaced +by \fB?\fR. Incomplete sequences are written verbatim to the output string. +The purpose of this switch is to gain compatibility to prior versions of TCL. +It is not recommended for any other usage. .TP \fBencoding dirs\fR ?\fIdirectoryList\fR? . @@ -90,6 +113,26 @@ set s [\fBencoding convertfrom\fR euc-jp "\exA4\exCF"] The result is the unicode codepoint: .QW "\eu306F" , which is the Hiragana letter HA. +.PP +The following example detects the error location in an incomplete UTF-8 sequence: +.PP +.CS +% set s [\fBencoding convertfrom\fR -failindex i utf-8 "A\xc3"] +A +% set i +1 +.CE +.PP +The following example detects the error location while transforming to ISO8859-1 +(ISO-Latin 1): +.PP +.CS +% set s [\fBencoding convertto\fR -failindex i utf-8 "A\u0141"] +A +% set i +1 +.CE +.PP .SH "SEE ALSO" Tcl_GetEncoding(3) .SH KEYWORDS -- cgit v0.12