From fa1e5ce70ab2eb900b31b03f0fddf2cc8c5243e8 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 23 Mar 2021 20:35:29 +0000 Subject: New Tcl_ExternalToUtfDStringEx/Tcl_UtfToExternalDStringEx functions. Not used yet --- generic/tcl.decls | 10 ++++++++++ generic/tclDecls.h | 14 ++++++++++++++ generic/tclEncoding.c | 53 ++++++++++++++++++++++++++++++++++++++++----------- generic/tclStubInit.c | 2 ++ 4 files changed, 68 insertions(+), 11 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index c39847b..c2a4abd 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2424,6 +2424,16 @@ declare 656 { const char *Tcl_UtfPrev(const char *src, const char *start) } +declare 657 { + int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, Tcl_DString *dsPtr, int flags) +} +declare 658 { + int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, Tcl_DString *dsPtr, int flags) +} + + # ----- BASELINE -- FOR -- 8.7.0 ----- # ############################################################################## diff --git a/generic/tclDecls.h b/generic/tclDecls.h index e509c2b..6ba39d5 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1937,6 +1937,14 @@ EXTERN int Tcl_UtfCharComplete(const char *src, int length); EXTERN const char * Tcl_UtfNext(const char *src); /* 656 */ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); +/* 657 */ +EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, + Tcl_DString *dsPtr, int flags); +/* 658 */ +EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, + Tcl_DString *dsPtr, int flags); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2629,6 +2637,8 @@ typedef struct TclStubs { int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ + int (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 657 */ + int (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 658 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -3971,6 +3981,10 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UtfNext) /* 655 */ #define Tcl_UtfPrev \ (tclStubsPtr->tcl_UtfPrev) /* 656 */ +#define Tcl_ExternalToUtfDStringEx \ + (tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 657 */ +#define Tcl_UtfToExternalDStringEx \ + (tclStubsPtr->tcl_UtfToExternalDStringEx) /* 658 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 4eabbda..fd5c52b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1089,7 +1089,7 @@ Tcl_CreateEncoding( /* *------------------------------------------------------------------------- * - * Tcl_ExternalToUtfDString -- + * Tcl_ExternalToUtfDString/Tcl_ExternalToUtfDStringEx -- * * Convert a source buffer from the specified encoding into UTF-8. If any * of the bytes in the source buffer are invalid or cannot be represented @@ -1099,7 +1099,7 @@ Tcl_CreateEncoding( * Results: * The converted bytes are stored in the DString, which is then NULL * terminated. The return value is a pointer to the value stored in the - * DString. + * DString resp. an error code. * * Side effects: * None. @@ -1117,10 +1117,26 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, dstPtr, 0); + return Tcl_DStringValue(dstPtr); +} + + +int +Tcl_ExternalToUtfDStringEx( + Tcl_Encoding encoding, /* The encoding for the source string, or NULL + * for the default system encoding. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for + * encoding-specific string length. */ + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + * converted string is stored. */ + int flags) /* Conversion control flags. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int dstLen, result, soFar, srcRead, dstWrote, dstChars; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1137,7 +1153,7 @@ Tcl_ExternalToUtfDString( srcLen = encodingPtr->lengthProc(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, @@ -1146,7 +1162,7 @@ Tcl_ExternalToUtfDString( if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return Tcl_DStringValue(dstPtr); + return result; } flags &= ~TCL_ENCODING_START; @@ -1279,7 +1295,7 @@ Tcl_ExternalToUtf( /* *------------------------------------------------------------------------- * - * Tcl_UtfToExternalDString -- + * Tcl_UtfToExternalDString/Tcl_UtfToExternalDStringEx -- * * Convert a source buffer from UTF-8 to the specified encoding. If any * of the bytes in the source buffer are invalid or cannot be represented @@ -1288,7 +1304,7 @@ Tcl_ExternalToUtf( * Results: * The converted bytes are stored in the DString, which is then NULL * terminated in an encoding-specific manner. The return value is a - * pointer to the value stored in the DString. + * pointer to the value stored in the DString resp. an error code. * * Side effects: * None. @@ -1306,10 +1322,25 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, dstPtr, 0); + return Tcl_DStringValue(dstPtr); +} + +int +Tcl_UtfToExternalDStringEx( + Tcl_Encoding encoding, /* The encoding for the converted string, or + * NULL for the default system encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for + * strlen(). */ + Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + * converted string is stored. */ + int flags) /* Conversion control flags. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int dstLen, result, soFar, srcRead, dstWrote, dstChars; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1325,10 +1356,10 @@ Tcl_UtfToExternalDString( } else if (srcLen < 0) { srcLen = strlen(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END | TCL_ENCODING_EXTERNAL; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, - srcLen, flags | TCL_ENCODING_EXTERNAL, &state, dst, dstLen, + srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); @@ -1337,7 +1368,7 @@ Tcl_UtfToExternalDString( Tcl_DStringSetLength(dstPtr, soFar + 1); } Tcl_DStringSetLength(dstPtr, soFar); - return Tcl_DStringValue(dstPtr); + return result; } flags &= ~TCL_ENCODING_START; diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index b66af58..0473bb1 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1932,6 +1932,8 @@ const TclStubs tclStubs = { Tcl_UtfCharComplete, /* 654 */ Tcl_UtfNext, /* 655 */ Tcl_UtfPrev, /* 656 */ + Tcl_ExternalToUtfDStringEx, /* 657 */ + Tcl_UtfToExternalDStringEx, /* 658 */ }; /* !END!: Do not edit above this line. */ -- cgit v0.12 From 9fb8027cf65024e499873614e710122af9044cf0 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 26 Mar 2021 16:47:51 +0000 Subject: More WIP: Add -stoponerror flag to "encoding convertfrom/converto" --- generic/tclCmdAH.c | 48 ++++++++++++++++++++++++++++++++++++++++-------- tests/cmdAH.test | 4 ++-- tests/encoding.test | 15 +++++++++++++++ tests/safe.test | 8 ++++---- 4 files changed, 61 insertions(+), 14 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index c09ad95..ee329ec 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -513,8 +513,8 @@ TclInitEncodingCmd( Tcl_Interp* interp) /* Tcl interpreter */ { static const EnsembleImplMap encodingImplMap[] = { - {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, - {"convertto", EncodingConverttoObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, + {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, + {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, {"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, @@ -550,17 +550,27 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ + const char *stopOnError = NULL; + int result; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc == 3) { + } else if ((unsigned)(objc - 3) < 2) { if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { return TCL_ERROR; } data = objv[2]; + if (objc > 3) { + stopOnError = Tcl_GetString(objv[3]); + if (stopOnError[0] != '-' || stopOnError[1] != 's' + || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { + goto encConvFromError; + } + } } else { - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data"); + encConvFromError: + Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror?"); return TCL_ERROR; } @@ -568,7 +578,13 @@ EncodingConvertfromObjCmd( * Convert the string into a byte array in 'ds' */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); - Tcl_ExternalToUtfDString(encoding, bytesPtr, length, &ds); + result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, &ds, + stopOnError ? TCL_ENCODING_STOPONERROR : 0); + if (stopOnError && (result != TCL_OK)) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d characters", Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } /* * Note that we cannot use Tcl_DStringResult here because it will @@ -612,19 +628,29 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ + int result; + const char *stopOnError = NULL; /* TODO - ADJUST OBJ INDICES WHEN ENSEMBLIFYING THIS */ if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc == 3) { + } else if ((unsigned)(objc - 3) < 2) { if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { return TCL_ERROR; } data = objv[2]; + if (objc > 3) { + stopOnError = Tcl_GetString(objv[3]); + if (stopOnError[0] != '-' || stopOnError[1] != 's' + || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { + goto encConvToError; + } + } } else { - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data"); + encConvToError: + Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror?"); return TCL_ERROR; } @@ -633,7 +659,13 @@ EncodingConverttoObjCmd( */ stringPtr = TclGetStringFromObj(data, &length); - Tcl_UtfToExternalDString(encoding, stringPtr, length, &ds); + result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, &ds, + stopOnError ? TCL_ENCODING_STOPONERROR : 0); + if (stopOnError && (result != TCL_OK)) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d bytes", Tcl_DStringLength(&ds))); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); diff --git a/tests/cmdAH.test b/tests/cmdAH.test index baa148e..29adeae 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,7 +178,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -200,7 +200,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} diff --git a/tests/encoding.test b/tests/encoding.test index b1150c6..f881d4f 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -580,6 +580,21 @@ test encoding-24.10 {Parse valid or invalid utf-8} { test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"] } 1 +test encoding-24.12 {Parse valid or invalid utf-8} { + string length [encoding convertfrom utf-8 "\xC0\x80" -stoponerror] +} 1 +test encoding-24.13 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC0\x81" -stoponerror +} -returnCodes 1 -result {encoding error after producing 0 characters} +test encoding-24.14 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC1\xBF" -stoponerror +} -returnCodes 1 -result {encoding error after producing 0 characters} +test encoding-24.15 {Parse valid or invalid utf-8} { + string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] +} 1 +test encoding-24.16 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror +} -returnCodes 1 -result {encoding error after producing 1 characters} file delete [file join [temporaryDirectory] iso2022.txt] diff --git a/tests/safe.test b/tests/safe.test index 8fca594..e2a9b83 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1267,7 +1267,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1276,7 +1276,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?" while executing "encoding convertfrom" invoked from within @@ -1289,7 +1289,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1298,7 +1298,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?" while executing "encoding convertto" invoked from within -- cgit v0.12 From 664b7500abd51bfa6257c7e3e8fc5846d18d522b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 30 Mar 2021 11:26:36 +0000 Subject: Add documentation. Do a better job of counting exactly which byte/character caused the encoding/decoding error --- doc/Encoding.3 | 20 +++++++++++++++++++- generic/tcl.decls | 8 ++++---- generic/tcl.h | 5 +++++ generic/tclCmdAH.c | 31 +++++++++++++++++++------------ generic/tclDecls.h | 16 ++++++++-------- generic/tclEncoding.c | 30 +++++++++++++++--------------- tests/encoding.test | 6 +++--- 7 files changed, 73 insertions(+), 43 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 2d2461e..c33878a 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -26,8 +26,14 @@ char * \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp char * +\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +.sp +char * \fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp +char * +\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +.sp int \fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr\fR) @@ -108,7 +114,9 @@ byte is converted and then to reset to an initial state. \fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should return immediately upon reading a source character that does not exist in the target encoding; otherwise a default fallback character will -automatically be substituted. +automatically be substituted. The flag \fBTCL_ENCODING_MODIFIED\fR makes +\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the +byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/wtf-8/cesu-8 encoders. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -208,6 +216,11 @@ When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP +\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR, +but it has an additional flags parameter. The return value is the index of +the first byte in the input string causing a conversion error. +Or (size_t)-1 if all is OK. +.PP \fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified \fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the source buffer and up to \fIdstLen\fR converted bytes are stored in \fIdst\fR. @@ -246,6 +259,11 @@ characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP +\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR, +but it has an additional flags parameter. The return value is the index of +the first byte in the input string causing a conversion error. +Or (size_t)-1 if all is OK. +.PP \fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from the source buffer and up to \fIdstLen\fR converted bytes are stored in diff --git a/generic/tcl.decls b/generic/tcl.decls index c2a4abd..8cd5bc9 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2425,12 +2425,12 @@ declare 656 { } declare 657 { - int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, Tcl_DString *dsPtr, int flags) + size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } declare 658 { - int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, Tcl_DString *dsPtr, int flags) + size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } diff --git a/generic/tcl.h b/generic/tcl.h index 38dda28..f783f4f 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2069,6 +2069,10 @@ typedef struct Tcl_EncodingType { * content. Otherwise, the number of chars * produced is controlled only by other limiting * factors. + * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of + * 0x00. Only valid for "utf-8", "wtf-8 and "cesu-8". + * This flag is implicit for external -> internal conversions, + * optional for internal -> external conversions. */ #define TCL_ENCODING_START 0x01 @@ -2076,6 +2080,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_STOPONERROR 0x04 #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 +#define TCL_ENCODING_MODIFIED 0x20 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index ee329ec..cd77e06 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -551,7 +551,7 @@ EncodingConvertfromObjCmd( int length; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ const char *stopOnError = NULL; - int result; + size_t result; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); @@ -563,7 +563,9 @@ EncodingConvertfromObjCmd( data = objv[2]; if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); - if (stopOnError[0] != '-' || stopOnError[1] != 's' + if (!stopOnError[0]) { + stopOnError = NULL; + } else if (stopOnError[0] != '-' || stopOnError[1] != 's' || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { goto encConvFromError; } @@ -578,10 +580,11 @@ EncodingConvertfromObjCmd( * Convert the string into a byte array in 'ds' */ bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); - result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, &ds, - stopOnError ? TCL_ENCODING_STOPONERROR : 0); - if (stopOnError && (result != TCL_OK)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d characters", Tcl_DStringLength(&ds))); + result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, + stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + if (stopOnError && (result != (size_t)-1)) { + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" + TCL_LL_MODIFIER "u byte%s", (long long)result, (result != 1)?"s":"")); Tcl_DStringFree(&ds); return TCL_ERROR; } @@ -628,7 +631,7 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - int result; + size_t result; const char *stopOnError = NULL; /* TODO - ADJUST OBJ INDICES WHEN ENSEMBLIFYING THIS */ @@ -643,7 +646,9 @@ EncodingConverttoObjCmd( data = objv[2]; if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); - if (stopOnError[0] != '-' || stopOnError[1] != 's' + if (!stopOnError[0]) { + stopOnError = NULL; + } else if (stopOnError[0] != '-' || stopOnError[1] != 's' || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { goto encConvToError; } @@ -659,10 +664,12 @@ EncodingConverttoObjCmd( */ stringPtr = TclGetStringFromObj(data, &length); - result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, &ds, - stopOnError ? TCL_ENCODING_STOPONERROR : 0); - if (stopOnError && (result != TCL_OK)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after producing %d bytes", Tcl_DStringLength(&ds))); + result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, + stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + if (stopOnError && (result != (size_t)-1)) { + result = Tcl_NumUtfChars(stringPtr, result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" + TCL_LL_MODIFIER "u character%s", (long long)result, (result != 1)?"s":"")); Tcl_DStringFree(&ds); return TCL_ERROR; } diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 6ba39d5..24760f9 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1938,13 +1938,13 @@ EXTERN const char * Tcl_UtfNext(const char *src); /* 656 */ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); /* 657 */ -EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, - Tcl_DString *dsPtr, int flags); +EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, + Tcl_DString *dsPtr); /* 658 */ -EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, - const char *src, int srcLen, - Tcl_DString *dsPtr, int flags); +EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, int srcLen, int flags, + Tcl_DString *dsPtr); typedef struct { const struct TclPlatStubs *tclPlatStubs; @@ -2637,8 +2637,8 @@ typedef struct TclStubs { int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ - int (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 657 */ - int (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, Tcl_DString *dsPtr, int flags); /* 658 */ + size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 657 */ + size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ } TclStubs; extern const TclStubs *tclStubsPtr; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 72f7690..0bce51b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -511,7 +511,6 @@ FillEncodingFileMap(void) */ /* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ -#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */ #define TCL_ENCODING_LE 0x80 /* Little-endian encoding, for ucs-2/utf-16 only */ void @@ -1117,26 +1116,27 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, dstPtr, 0); + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, 0, dstPtr); return Tcl_DStringValue(dstPtr); } -int +size_t Tcl_ExternalToUtfDStringEx( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ - int flags) /* Conversion control flags. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1160,13 +1160,12 @@ Tcl_ExternalToUtfDStringEx( flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return result; + return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart); } - flags &= ~TCL_ENCODING_START; - src += srcRead; srcLen -= srcRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); @@ -1321,25 +1320,26 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { - Tcl_UtfToExternalDStringEx(encoding, src, srcLen, dstPtr, 0); + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, 0, dstPtr); return Tcl_DStringValue(dstPtr); } -int +size_t Tcl_UtfToExternalDStringEx( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - Tcl_DString *dstPtr, /* Uninitialized or free DString in which the + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ - int flags) /* Conversion control flags. */ { char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; int dstLen, result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1355,23 +1355,23 @@ Tcl_UtfToExternalDStringEx( } else if (srcLen < 0) { srcLen = strlen(src); } - flags |= TCL_ENCODING_START | TCL_ENCODING_END | TCL_ENCODING_EXTERNAL; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + src += srcRead; if (result != TCL_CONVERT_NOSPACE) { if (encodingPtr->nullSize == 2) { Tcl_DStringSetLength(dstPtr, soFar + 1); } Tcl_DStringSetLength(dstPtr, soFar); - return result; + return (result == TCL_OK) ? (size_t)-1 : (size_t)(src - srcStart); } flags &= ~TCL_ENCODING_START; - src += srcRead; srcLen -= srcRead; if (Tcl_DStringLength(dstPtr) == 0) { Tcl_DStringSetLength(dstPtr, dstLen); diff --git a/tests/encoding.test b/tests/encoding.test index 76e2ca4..63f0fa6 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -585,16 +585,16 @@ test encoding-24.12 {Parse valid or invalid utf-8} { } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC0\x81" -stoponerror -} -returnCodes 1 -result {encoding error after producing 0 characters} +} -returnCodes 1 -result {encoding error after reading 0 bytes} test encoding-24.14 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC1\xBF" -stoponerror -} -returnCodes 1 -result {encoding error after producing 0 characters} +} -returnCodes 1 -result {encoding error after reading 0 bytes} test encoding-24.15 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror -} -returnCodes 1 -result {encoding error after producing 1 characters} +} -returnCodes 1 -result {encoding error after reading 1 byte} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 41533dc84a21444a1885476d2b4ac780b6581a44 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 30 Mar 2021 11:54:50 +0000 Subject: add testcase for "encoding convertto". Move stub table one positions --- generic/tcl.decls | 4 ++-- generic/tcl.h | 2 +- generic/tclCmdAH.c | 2 +- generic/tclDecls.h | 15 +++++++++------ generic/tclStubInit.c | 5 +++-- tests/encoding.test | 9 ++++++--- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/generic/tcl.decls b/generic/tcl.decls index 8cd5bc9..0dfa415 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2424,11 +2424,11 @@ declare 656 { const char *Tcl_UtfPrev(const char *src, const char *start) } -declare 657 { +declare 658 { size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } -declare 658 { +declare 659 { size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr) } diff --git a/generic/tcl.h b/generic/tcl.h index f783f4f..e1b6066 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2080,7 +2080,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_STOPONERROR 0x04 #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 -#define TCL_ENCODING_MODIFIED 0x20 +#define TCL_ENCODING_MODIFIED 0x20 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index cd77e06..df80d3c 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -583,7 +583,7 @@ EncodingConvertfromObjCmd( result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); if (stopOnError && (result != (size_t)-1)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" + Tcl_SetObjResult(interp, Tcl_ObjPrintf("decoding error after reading %" TCL_LL_MODIFIER "u byte%s", (long long)result, (result != 1)?"s":"")); Tcl_DStringFree(&ds); return TCL_ERROR; diff --git a/generic/tclDecls.h b/generic/tclDecls.h index 24760f9..6ee645d 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1937,11 +1937,12 @@ EXTERN int Tcl_UtfCharComplete(const char *src, int length); EXTERN const char * Tcl_UtfNext(const char *src); /* 656 */ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); -/* 657 */ +/* Slot 657 is reserved */ +/* 658 */ EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); -/* 658 */ +/* 659 */ EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); @@ -2637,8 +2638,9 @@ typedef struct TclStubs { int (*tcl_UtfCharComplete) (const char *src, int length); /* 654 */ const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ - size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 657 */ - size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ + void (*reserved657)(void); + size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ + size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, int srcLen, int flags, Tcl_DString *dsPtr); /* 659 */ } TclStubs; extern const TclStubs *tclStubsPtr; @@ -3981,10 +3983,11 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UtfNext) /* 655 */ #define Tcl_UtfPrev \ (tclStubsPtr->tcl_UtfPrev) /* 656 */ +/* Slot 657 is reserved */ #define Tcl_ExternalToUtfDStringEx \ - (tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 657 */ + (tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 658 */ #define Tcl_UtfToExternalDStringEx \ - (tclStubsPtr->tcl_UtfToExternalDStringEx) /* 658 */ + (tclStubsPtr->tcl_UtfToExternalDStringEx) /* 659 */ #endif /* defined(USE_TCL_STUBS) */ diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 0473bb1..54ab4b6 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1932,8 +1932,9 @@ const TclStubs tclStubs = { Tcl_UtfCharComplete, /* 654 */ Tcl_UtfNext, /* 655 */ Tcl_UtfPrev, /* 656 */ - Tcl_ExternalToUtfDStringEx, /* 657 */ - Tcl_UtfToExternalDStringEx, /* 658 */ + 0, /* 657 */ + Tcl_ExternalToUtfDStringEx, /* 658 */ + Tcl_UtfToExternalDStringEx, /* 659 */ }; /* !END!: Do not edit above this line. */ diff --git a/tests/encoding.test b/tests/encoding.test index 63f0fa6..1c12be0 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -585,16 +585,19 @@ test encoding-24.12 {Parse valid or invalid utf-8} { } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC0\x81" -stoponerror -} -returnCodes 1 -result {encoding error after reading 0 bytes} +} -returnCodes 1 -result {decoding error after reading 0 bytes} test encoding-24.14 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC1\xBF" -stoponerror -} -returnCodes 1 -result {encoding error after reading 0 bytes} +} -returnCodes 1 -result {decoding error after reading 0 bytes} test encoding-24.15 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror -} -returnCodes 1 -result {encoding error after reading 1 byte} +} -returnCodes 1 -result {decoding error after reading 1 byte} +test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror +} -returnCodes 1 -result {encoding error after reading 1 character} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 78a0992b4431f976641f3d08f63c13fab742e1b9 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 30 Mar 2021 13:35:15 +0000 Subject: Better error-messages --- generic/tclCmdAH.c | 23 ++++++++++++++++------- tests/encoding.test | 11 +++++++---- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index df80d3c..0c0a4a4 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -579,12 +579,19 @@ EncodingConvertfromObjCmd( /* * Convert the string into a byte array in 'ds' */ - bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); + if (stopOnError) { + bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); + if (bytesPtr == NULL) { + return TCL_ERROR; + } + } else { + bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); + } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); if (stopOnError && (result != (size_t)-1)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("decoding error after reading %" - TCL_LL_MODIFIER "u byte%s", (long long)result, (result != 1)?"s":"")); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" + TCL_LL_MODIFIER "u: '%c' (\\x%X)", (long long)result, UCHAR(bytesPtr[result]), UCHAR(bytesPtr[result]))); Tcl_DStringFree(&ds); return TCL_ERROR; } @@ -667,10 +674,12 @@ EncodingConverttoObjCmd( result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); if (stopOnError && (result != (size_t)-1)) { - result = Tcl_NumUtfChars(stringPtr, result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("encoding error after reading %" - TCL_LL_MODIFIER "u character%s", (long long)result, (result != 1)?"s":"")); - Tcl_DStringFree(&ds); + size_t pos = Tcl_NumUtfChars(stringPtr, result); + int ucs4; + TclUtfToUCS4(&stringPtr[result], &ucs4); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" + TCL_LL_MODIFIER "u: '%1s' (U+%06X)", (long long)pos, &stringPtr[result], ucs4)); + Tcl_DStringFree(&ds); return TCL_ERROR; } Tcl_SetObjResult(interp, diff --git a/tests/encoding.test b/tests/encoding.test index 1c12be0..114b296 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -585,19 +585,22 @@ test encoding-24.12 {Parse valid or invalid utf-8} { } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC0\x81" -stoponerror -} -returnCodes 1 -result {decoding error after reading 0 bytes} +} -returnCodes 1 -result {unexpected byte at index 0: 'À' (\xC0)} test encoding-24.14 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "\xC1\xBF" -stoponerror -} -returnCodes 1 -result {decoding error after reading 0 bytes} +} -returnCodes 1 -result {unexpected byte at index 0: 'Á' (\xC1)} test encoding-24.15 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror -} -returnCodes 1 -result {decoding error after reading 1 byte} +} -returnCodes 1 -result {unexpected byte at index 1: 'à' (\xE0)} test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\u4343\x80"] -stoponerror +} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} +test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror -} -returnCodes 1 -result {encoding error after reading 1 character} +} -returnCodes 1 -match glob -result {unexpected character at index 1: '*' (U+0000E0)} file delete [file join [temporaryDirectory] iso2022.txt] -- cgit v0.12 From 975d478bfaf46abfe1b34bdbd82dd0dc9556d864 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 Apr 2021 09:10:13 +0000 Subject: More bugfixes (and testcases showing this) --- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 17 +++++++++++------ tests/encoding.test | 5 ++++- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 0c0a4a4..1dfabd2 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -678,7 +678,7 @@ EncodingConverttoObjCmd( int ucs4; TclUtfToUCS4(&stringPtr[result], &ucs4); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_LL_MODIFIER "u: '%1s' (U+%06X)", (long long)pos, &stringPtr[result], ucs4)); + TCL_LL_MODIFIER "u: '%c' (U+%06X)", (long long)pos, ucs4, ucs4)); Tcl_DStringFree(&ds); return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d28fc8c..6cf0d76 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2303,18 +2303,23 @@ UtfToUtfProc( * unless the user has explicitly asked to be told. */ - if (flags & TCL_ENCODING_STOPONERROR) { - result = TCL_CONVERT_MULTIBYTE; - break; + if (flags & TCL_ENCODING_MODIFIED) { + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_MULTIBYTE; + break; + } + ch = UCHAR(*src++); + } else { + char chbuf[2]; + chbuf[0] = UCHAR(*src++); chbuf[1] = 0; + TclUtfToUCS4(chbuf, &ch); } - ch = UCHAR(*src); - src += 1; dst += Tcl_UniCharToUtf(ch, dst); } else { int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)) { + if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR) && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; } diff --git a/tests/encoding.test b/tests/encoding.test index 45b5f49..3b3f42c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -671,8 +671,11 @@ test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring - } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror -} -returnCodes 1 -match glob -result {unexpected character at index 1: '*' (U+0000E0)} +} -result "Z\xC3\xA0\xE2\x82\xAC" test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] -stoponerror +} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" +test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 "ZX\uD800" -stoponerror } -returnCodes 1 -match glob -result "unexpected character at index 2: '\uD800' (U+00D800)" -- cgit v0.12 From 6941f99c78c730b92f232078e1aa3bad1b84ae1c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 Apr 2021 13:50:21 +0000 Subject: Add experimental "-nothrow" option to encoding convertfrom|convertto. If compiled with -DTCL_NO_DEPRECATED (meant for Tcl 9.0), -stoponerror is the default for all IO --- generic/tcl.h | 13 +++++++++++-- generic/tclCmdAH.c | 36 ++++++++++++++++++++++++++++-------- generic/tclEncoding.c | 26 ++++++++++++++++---------- tests/chanio.test | 8 +++++--- tests/cmdAH.test | 4 ++-- tests/encoding.test | 40 ++++++++++++++++++++-------------------- tests/http.test | 4 +++- tests/io.test | 11 ++++++----- tests/main.test | 4 +++- tests/safe.test | 8 ++++---- tests/source.test | 4 +++- 11 files changed, 101 insertions(+), 57 deletions(-) diff --git a/generic/tcl.h b/generic/tcl.h index dfb4c3a..f6c6730 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2056,10 +2056,10 @@ typedef struct Tcl_EncodingType { * encountering an invalid byte sequence or a * source character that has no mapping in the * target encoding. If clear, the converter - * substitues the problematic character(s) with + * substitutes the problematic character(s) with * one or more "close" characters in the * destination buffer and then continues to - * convert the source. + * convert the source. Only for Tcl 8.x. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need * an extra byte for a terminating NUL, it fills @@ -2078,6 +2078,14 @@ typedef struct Tcl_EncodingType { * 0x00. Only valid for "utf-8", "wtf-8 and "cesu-8". * This flag is implicit for external -> internal conversions, * optional for internal -> external conversions. + * TCL_ENCODING_NO_THROW - If set, the converter + * substitutes the problematic character(s) with + * one or more "close" characters in the + * destination buffer and then continues to + * convert the source. If clear, the converter returns + * immediately upon encountering an invalid byte sequence + * or a source character that has no mapping in the + * target encoding. Only for Tcl 9.x. */ #define TCL_ENCODING_START 0x01 @@ -2086,6 +2094,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 +#define TCL_ENCODING_NO_THROW 0x40 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 1dfabd2..ca8e939 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -564,15 +564,25 @@ EncodingConvertfromObjCmd( if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); if (!stopOnError[0]) { +#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) stopOnError = NULL; - } else if (stopOnError[0] != '-' || stopOnError[1] != 's' - || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { +#endif + } else if (stopOnError[0] == '-' && stopOnError[1] == 'n' + && !strncmp(stopOnError, "-nothrow", strlen(stopOnError))) { + stopOnError = NULL; + } else if (stopOnError[0] == '-' && stopOnError[1] == 's' + && !strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { + } else { goto encConvFromError; } +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + } else { + stopOnError = ""; +#endif } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror?"); + Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror|-nothrow?"); return TCL_ERROR; } @@ -588,7 +598,7 @@ EncodingConvertfromObjCmd( bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, - stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + stopOnError ? TCL_ENCODING_STOPONERROR : TCL_ENCODING_NO_THROW, &ds); if (stopOnError && (result != (size_t)-1)) { Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" TCL_LL_MODIFIER "u: '%c' (\\x%X)", (long long)result, UCHAR(bytesPtr[result]), UCHAR(bytesPtr[result]))); @@ -654,15 +664,25 @@ EncodingConverttoObjCmd( if (objc > 3) { stopOnError = Tcl_GetString(objv[3]); if (!stopOnError[0]) { +#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) stopOnError = NULL; - } else if (stopOnError[0] != '-' || stopOnError[1] != 's' - || strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { +#endif + } else if (stopOnError[0] == '-' && stopOnError[1] == 'n' + && !strncmp(stopOnError, "-nothrow", strlen(stopOnError))) { + stopOnError = NULL; + } else if (stopOnError[0] == '-' && stopOnError[1] == 's' + && !strncmp(stopOnError, "-stoponerror", strlen(stopOnError))) { + } else { goto encConvToError; } +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + } else { + stopOnError = ""; +#endif } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror?"); + Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror|-nothrow?"); return TCL_ERROR; } @@ -672,7 +692,7 @@ EncodingConverttoObjCmd( stringPtr = TclGetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, - stopOnError ? TCL_ENCODING_STOPONERROR : 0, &ds); + stopOnError ? TCL_ENCODING_STOPONERROR : TCL_ENCODING_NO_THROW, &ds); if (stopOnError && (result != (size_t)-1)) { size_t pos = Tcl_NumUtfChars(stringPtr, result); int ucs4; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b7c0a4f..76dbe7f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2223,6 +2223,12 @@ BinaryProc( *------------------------------------------------------------------------- */ +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) +# define STOPONERROR !(flags & TCL_ENCODING_NO_THROW) +#else +# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) +#endif + static int UtfToUtfProc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ @@ -2305,7 +2311,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -2320,7 +2326,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR) + if ((len < 2) && (ch != 0) && STOPONERROR && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; @@ -2346,7 +2352,7 @@ UtfToUtfProc( if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { if (!(flags & TCL_ENCODING_WTF)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2365,7 +2371,7 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); ch = low; } else if (!(flags & TCL_ENCODING_WTF) && !Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2561,7 +2567,7 @@ UtfToUtf16Proc( } len = TclUtfToUCS4(src, &ch); if (!(flags & TCL_ENCODING_WTF) && !Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2781,7 +2787,7 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; } @@ -2901,7 +2907,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3089,7 +3095,7 @@ Iso88591FromUtfProc( || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3316,7 +3322,7 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if ((flags & TCL_ENCODING_STOPONERROR) == 0) { + if (!STOPONERROR) { /* * Skip the unknown escape sequence. */ @@ -3491,7 +3497,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } diff --git a/tests/chanio.test b/tests/chanio.test index 8dfefb7..64d67d1 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -18,6 +18,8 @@ if {"::tcltest" ni [namespace children]} { namespace import -force ::tcltest::* } +testConstraint nodep [info exists tcl_precision] + namespace eval ::tcl::test::io { if {"::tcltest" ni [namespace children]} { @@ -248,7 +250,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod } -cleanup { chan close $f } -result "\r\n12" -test chan-io-3.4 {WriteChars: loop over stage buffer} { +test chan-io-3.4 {WriteChars: loop over stage buffer} nodep { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] chan configure $f -encoding jis0208 -buffersize 16 @@ -257,7 +259,7 @@ test chan-io-3.4 {WriteChars: loop over stage buffer} { chan close $f lappend x [contents $path(test1)] } [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test chan-io-3.5 {WriteChars: saved != 0} { +test chan-io-3.5 {WriteChars: saved != 0} nodep { # Bytes produced by UtfToExternal from end of last channel buffer had to # be moved to beginning of next channel buffer to preserve requested # buffersize. @@ -284,7 +286,7 @@ test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { chan close $f lappend x [contents $path(test1)] } [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] -test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { +test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} nodep { # When translating UTF-8 to external, the produced bytes went past end of # the channel buffer. This is done on purpose - we then truncate the bytes # at the end of the partial character to preserve the requested blocksize diff --git a/tests/cmdAH.test b/tests/cmdAH.test index f60068d..e9973a9 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,7 +178,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?"} +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -200,7 +200,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?"} +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} diff --git a/tests/encoding.test b/tests/encoding.test index 3b3f42c..0a5417e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -291,7 +291,7 @@ test encoding-11.9 {encoding: extended Unicode UTF-16} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto iso8859-3 Õ] + append x [encoding convertto iso8859-3 Õ -nothrow] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -400,7 +400,7 @@ test encoding-15.15 {UtfToUtfProc low surrogate character output} { } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2 -nothrow] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -411,61 +411,61 @@ test encoding-15.17 {UtfToUtfProc emoji character output} { } {4 f09f9882} test encoding-15.18 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D -nothrow] binary scan $y H* z list [string length $y] $z } {10 efbfbdf09f9882efbfbd} test encoding-15.19 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto utf-8 \uDE02\uD83D\uD83D -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 efbfbdefbfbdefbfbd} test encoding-15.20 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\xE9 - set y [encoding convertto utf-8 \uDE02\uD83D\xE9] + set y [encoding convertto utf-8 \uDE02\uD83D\xE9 -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 efbfbdefbfbdc3a9} test encoding-15.21 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto utf-8 \uDE02\uD83DX] + set y [encoding convertto utf-8 \uDE02\uD83DX -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 efbfbdefbfbd58} test encoding-15.22 {UtfToUtfProc high surrogate character output} { set x \uDE02\xE9 - set y [encoding convertto utf-8 \uDE02\xE9] + set y [encoding convertto utf-8 \uDE02\xE9 -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.23 {UtfToUtfProc low surrogate character output} { set x \uDA02\xE9 - set y [encoding convertto utf-8 \uDA02\xE9] + set y [encoding convertto utf-8 \uDA02\xE9 -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.24 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto utf-8 \uDE02Y] + set y [encoding convertto utf-8 \uDE02Y -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.25 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto utf-8 \uDA02Y] + set y [encoding convertto utf-8 \uDA02Y -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.26 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto utf-8 \uDE02] + set y [encoding convertto utf-8 \uDE02 -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} test encoding-15.27 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto utf-8 \uDA02] + set y [encoding convertto utf-8 \uDA02 -nothrow] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} @@ -509,10 +509,10 @@ test encoding-17.4 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.5 {UtfToUtf16Proc} -body { - encoding convertto utf-16be "\uDCDC" + encoding convertto utf-16be "\uDCDC" -nothrow } -result "\xFF\xFD" test encoding-17.6 {UtfToUtf16Proc} -body { - encoding convertto utf-16le "\uD8D8" + encoding convertto utf-16le "\uD8D8" -nothrow } -result "\xFD\xFF" test encoding-18.1 {TableToUtfProc} { @@ -631,25 +631,25 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x81"] + string length [encoding convertfrom utf-8 "\xC0\x81" -nothrow] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC1\xBF"] + string length [encoding convertfrom utf-8 "\xC1\xBF" -nothrow] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom utf-8 "\xE0\x80\x80" -nothrow] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom utf-8 "\xE0\x9F\xBF" -nothrow] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom utf-8 "\xEF\xBF\xBF" -nothrow] } 1 test encoding-24.12 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80" -stoponerror] @@ -833,7 +833,7 @@ test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { incr count - encoding convertto $name $string + encoding convertto $name $string -nothrow # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. diff --git a/tests/http.test b/tests/http.test index 2fd5af4..1275984 100644 --- a/tests/http.test +++ b/tests/http.test @@ -31,6 +31,8 @@ if {[catch {package require http 2} version]} { } } +testConstraint nodep [info exists tcl_precision] + proc bgerror {args} { global errorInfo puts stderr "http.test bgerror" @@ -661,7 +663,7 @@ test http-7.3 {http::formatQuery} -setup { } -cleanup { http::config -urlencoding $enc } -result "can't read \"formMap(∈)\": no such element in array" -test http-7.4 {http::formatQuery} -setup { +test http-7.4 {http::formatQuery} -constraints nodep -setup { set enc [http::config -urlencoding] } -body { # this would be reverting to http <=2.4 behavior w/o errors diff --git a/tests/io.test b/tests/io.test index e0a2389..329d041 100644 --- a/tests/io.test +++ b/tests/io.test @@ -48,6 +48,7 @@ testConstraint testservicemode [llength [info commands testservicemode]] testConstraint notWinCI [expr { $::tcl_platform(platform) ne "windows" || ![info exists ::env(CI)]}] testConstraint notOSX [expr {$::tcl_platform(os) ne "Darwin"}] +testConstraint nodep [info exists tcl_precision] # You need a *very* special environment to do some tests. In # particular, many file systems do not support large-files... @@ -268,7 +269,7 @@ test io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} { close $f set x } "\r\n12" -test io-3.4 {WriteChars: loop over stage buffer} { +test io-3.4 {WriteChars: loop over stage buffer} nodep { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] @@ -278,7 +279,7 @@ test io-3.4 {WriteChars: loop over stage buffer} { close $f lappend x [contents $path(test1)] } [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test io-3.5 {WriteChars: saved != 0} { +test io-3.5 {WriteChars: saved != 0} nodep { # Bytes produced by UtfToExternal from end of last channel buffer # had to be moved to beginning of next channel buffer to preserve # requested buffersize. @@ -307,7 +308,7 @@ test io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { close $f lappend x [contents $path(test1)] } [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] -test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { +test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} nodep { # When translating UTF-8 to external, the produced bytes went past end # of the channel buffer. This is done purpose -- we then truncate the # bytes at the end of the partial character to preserve the requested @@ -1532,7 +1533,7 @@ test io-12.8 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 160 -test io-12.9 {ReadChars: multibyte chars split} { +test io-12.9 {ReadChars: multibyte chars split} nodep { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 @@ -1543,7 +1544,7 @@ test io-12.9 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 194 -test io-12.10 {ReadChars: multibyte chars split} { +test io-12.10 {ReadChars: multibyte chars split} nodep { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 diff --git a/tests/main.test b/tests/main.test index 2d3f63c..1480bc2 100644 --- a/tests/main.test +++ b/tests/main.test @@ -5,6 +5,8 @@ if {"::tcltest" ni [namespace children]} { namespace import -force ::tcltest::* } +testConstraint nodep [info exists tcl_precision] + namespace eval ::tcl::test::main { namespace import ::tcltest::* @@ -143,7 +145,7 @@ namespace eval ::tcl::test::main { test Tcl_Main-1.8 { Tcl_Main: startup script - -encoding option - mismatched encodings } -constraints { - stdio + stdio nodep } -setup { set script [makeFile {} script] file delete $script diff --git a/tests/safe.test b/tests/safe.test index e2a9b83..b6668d7 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1267,7 +1267,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?"} +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1276,7 +1276,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror?" +} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?" while executing "encoding convertfrom" invoked from within @@ -1289,7 +1289,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?"} +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1298,7 +1298,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror?" +} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?" while executing "encoding convertto" invoked from within diff --git a/tests/source.test b/tests/source.test index eee03ec..1748a70 100644 --- a/tests/source.test +++ b/tests/source.test @@ -20,6 +20,8 @@ if {[catch {package require tcltest 2.5}]} { namespace eval ::tcl::test::source { namespace import ::tcltest::* +testConstraint nodep [info exists tcl_precision] + test source-1.1 {source command} -setup { set x "old x value" set y "old y value" @@ -275,7 +277,7 @@ test source-7.5 {source -encoding: correct operation} -setup { removeFile source.file rename € {} } -result foo -test source-7.6 {source -encoding: mismatch encoding error} -setup { +test source-7.6 {source -encoding: mismatch encoding error} -constraints nodep -setup { set sourcefile [makeFile {} source.file] file delete $sourcefile set f [open $sourcefile w] -- cgit v0.12 From 684b9f01af31b898f57e7f05934043893186afc2 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 1 Apr 2021 14:55:22 +0000 Subject: Set errorcode for STOPONERROR --- generic/tclCmdAH.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index ca8e939..cb5ef01 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -600,8 +600,12 @@ EncodingConvertfromObjCmd( result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, stopOnError ? TCL_ENCODING_STOPONERROR : TCL_ENCODING_NO_THROW, &ds); if (stopOnError && (result != (size_t)-1)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" - TCL_LL_MODIFIER "u: '%c' (\\x%X)", (long long)result, UCHAR(bytesPtr[result]), UCHAR(bytesPtr[result]))); + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" + TCL_Z_MODIFIER "u: '%c' (\\x%X)", result, UCHAR(bytesPtr[result]), UCHAR(bytesPtr[result]))); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "STOPONERROR", + buf, NULL); Tcl_DStringFree(&ds); return TCL_ERROR; } @@ -696,9 +700,13 @@ EncodingConverttoObjCmd( if (stopOnError && (result != (size_t)-1)) { size_t pos = Tcl_NumUtfChars(stringPtr, result); int ucs4; + char buf[TCL_INTEGER_SPACE]; TclUtfToUCS4(&stringPtr[result], &ucs4); + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_LL_MODIFIER "u: '%c' (U+%06X)", (long long)pos, ucs4, ucs4)); + TCL_Z_MODIFIER "u: '%c' (U+%06X)", pos, ucs4, ucs4)); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "STOPONERROR", + buf, NULL); Tcl_DStringFree(&ds); return TCL_ERROR; } -- cgit v0.12 From 06c51c6b90d0f09d4b7cebd7a4018e9ca5dacd9f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 30 Apr 2021 13:39:35 +0000 Subject: More test-cases --- tests/encoding.test | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index 195fc25..5471e0b 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -600,7 +600,33 @@ test encoding-24.10 {Parse valid or invalid utf-8} { test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xEF\xBF\xBF" -nothrow] } 1 - +test encoding-24.12 {Parse valid or invalid utf-8} { + string length [encoding convertfrom utf-8 "\xC0\x80" -stoponerror] +} 1 +test encoding-24.13 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC0\x81" -stoponerror +} -returnCodes 1 -result {unexpected byte at index 0: 'À' (\xC0)} +test encoding-24.14 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC1\xBF" -stoponerror +} -returnCodes 1 -result {unexpected byte at index 0: 'Á' (\xC1)} +test encoding-24.15 {Parse valid or invalid utf-8} { + string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] +} 1 +test encoding-24.16 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror +} -returnCodes 1 -result {unexpected byte at index 1: 'à' (\xE0)} +test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\u4343\x80"] -stoponerror +} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} +test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror +} -result "Z\xC3\xA0\xE2\x82\xAC" +test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] -stoponerror +} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" +test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 "ZX\uD800" -stoponerror +} -returnCodes 1 -match glob -result "unexpected character at index 2: '\uD800' (U+00D800)" file delete [file join [temporaryDirectory] iso2022.txt] # -- cgit v0.12 From 9e781ffb02d3f384c1123ddcb6f96944cc4dc3ef Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 6 May 2021 11:35:39 +0000 Subject: Make ?-stoponerror|-nothrow? argument first in stead of last for encoding convertto/convertfrom --- generic/tclCmdAH.c | 16 +++++++-------- tests/cmdAH.test | 4 ++-- tests/encoding.test | 58 ++++++++++++++++++++++++++--------------------------- tests/safe.test | 8 ++++---- 4 files changed, 43 insertions(+), 43 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index cb5ef01..682ba3f 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -557,12 +557,12 @@ EncodingConvertfromObjCmd( encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; } else if ((unsigned)(objc - 3) < 2) { - if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } - data = objv[2]; + data = objv[objc - 1]; if (objc > 3) { - stopOnError = Tcl_GetString(objv[3]); + stopOnError = Tcl_GetString(objv[1]); if (!stopOnError[0]) { #if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) stopOnError = NULL; @@ -582,7 +582,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror|-nothrow?"); + Tcl_WrongNumArgs(interp, 1, objv, "?-stoponerror|-nothrow? ?encoding? data"); return TCL_ERROR; } @@ -661,12 +661,12 @@ EncodingConverttoObjCmd( encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; } else if ((unsigned)(objc - 3) < 2) { - if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } - data = objv[2]; + data = objv[objc - 1]; if (objc > 3) { - stopOnError = Tcl_GetString(objv[3]); + stopOnError = Tcl_GetString(objv[1]); if (!stopOnError[0]) { #if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) stopOnError = NULL; @@ -686,7 +686,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data ?-stoponerror|-nothrow?"); + Tcl_WrongNumArgs(interp, 1, objv, "?-stoponerror|-nothrow? ?encoding? data"); return TCL_ERROR; } diff --git a/tests/cmdAH.test b/tests/cmdAH.test index e9973a9..5cf8fac 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,7 +178,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?"} +} -result {wrong # args: should be "encoding convertto ?-stoponerror|-nothrow? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -200,7 +200,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?"} +} -result {wrong # args: should be "encoding convertfrom ?-stoponerror|-nothrow? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} diff --git a/tests/encoding.test b/tests/encoding.test index 5471e0b..91fb1ec 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -291,7 +291,7 @@ test encoding-11.9 {encoding: extended Unicode UTF-16} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto iso8859-3 Õ -nothrow] + append x [encoding convertto -nothrow iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -340,67 +340,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {10 efbfbdf09f9882efbfbd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uD83D -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 efbfbdefbfbdefbfbd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto utf-8 \uDE02\uD83Dé -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 efbfbdefbfbdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto utf-8 \uDE02\uD83DX -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 efbfbdefbfbd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto utf-8 \uDE02é -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto utf-8 \uDA02é -nothrow] + set y [encoding convertto -nothrow utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 efbfbdc3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto utf-8 \uDE02Y -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto utf-8 \uDA02Y -nothrow] + set y [encoding convertto -nothrow utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 efbfbd59} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto utf-8 \uDE02 -nothrow] + set y [encoding convertto -nothrow utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto utf-8 \uDA02 -nothrow] + set y [encoding convertto -nothrow utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 efbfbd} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2 -nothrow] + set y [encoding convertfrom -nothrow utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -458,10 +458,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto utf-16be "\uDCDC" -nothrow + encoding convertto -nothrow utf-16be "\uDCDC" } -result "\xFF\xFD" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto utf-16le "\uD8D8" -nothrow + encoding convertto -nothrow utf-16le "\uD8D8" } -result "\xFD\xFF" test encoding-18.1 {TableToUtfProc} { @@ -580,52 +580,52 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x81" -nothrow] + string length [encoding convertfrom -nothrow utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC1\xBF" -nothrow] + string length [encoding convertfrom -nothrow utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x80\x80" -nothrow] + string length [encoding convertfrom -nothrow utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x9F\xBF" -nothrow] + string length [encoding convertfrom -nothrow utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xEF\xBF\xBF" -nothrow] + string length [encoding convertfrom -nothrow utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x80" -stoponerror] + string length [encoding convertfrom -stoponerror utf-8 "\xC0\x80"] } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "\xC0\x81" -stoponerror + encoding convertfrom -stoponerror utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte at index 0: 'À' (\xC0)} test encoding-24.14 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "\xC1\xBF" -stoponerror + encoding convertfrom -stoponerror utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte at index 0: 'Á' (\xC1)} test encoding-24.15 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC2\x80" -stoponerror] + string length [encoding convertfrom -stoponerror utf-8 "\xC2\x80"] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "Z\xE0\x80" -stoponerror + encoding convertfrom -stoponerror utf-8 "Z\xE0\x80" } -returnCodes 1 -result {unexpected byte at index 1: 'à' (\xE0)} test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\u4343\x80"] -stoponerror + encoding convertto -stoponerror utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\xE0\x80"] -stoponerror + encoding convertto -stoponerror utf-8 [testbytestring "Z\xE0\x80"] } -result "Z\xC3\xA0\xE2\x82\xAC" test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] -stoponerror + encoding convertto -stoponerror utf-8 [testbytestring "Z\xE0\x80xxxxxx"] } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { - encoding convertto utf-8 "ZX\uD800" -stoponerror + encoding convertto -stoponerror utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: '\uD800' (U+00D800)" file delete [file join [temporaryDirectory] iso2022.txt] @@ -781,7 +781,7 @@ test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { incr count - encoding convertto $name $string -nothrow + encoding convertto -nothrow $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. diff --git a/tests/safe.test b/tests/safe.test index e7e427b..2ea32f5 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?"} +} -result {wrong # args: should be "encoding convertfrom ?-stoponerror|-nothrow? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data ?-stoponerror|-nothrow?" +} -result {wrong # args: should be "encoding convertfrom ?-stoponerror|-nothrow? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1291,7 +1291,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?"} +} -result {wrong # args: should be "encoding convertto ?-stoponerror|-nothrow? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1300,7 +1300,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data ?-stoponerror|-nothrow?" +} -result {wrong # args: should be "encoding convertto ?-stoponerror|-nothrow? ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12 From 82b2bfa1b8f90760f53b543c9dc7e4fa7c2e3510 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 6 May 2021 14:16:58 +0000 Subject: Remove character/byte value from error-message, only use hex here. --- generic/tclCmdAH.c | 4 ++-- tests/encoding.test | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 682ba3f..1361f11 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -603,7 +603,7 @@ EncodingConvertfromObjCmd( char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" - TCL_Z_MODIFIER "u: '%c' (\\x%X)", result, UCHAR(bytesPtr[result]), UCHAR(bytesPtr[result]))); + TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "STOPONERROR", buf, NULL); Tcl_DStringFree(&ds); @@ -704,7 +704,7 @@ EncodingConverttoObjCmd( TclUtfToUCS4(&stringPtr[result], &ucs4); sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_Z_MODIFIER "u: '%c' (U+%06X)", pos, ucs4, ucs4)); + TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "STOPONERROR", buf, NULL); Tcl_DStringFree(&ds); diff --git a/tests/encoding.test b/tests/encoding.test index 91fb1ec..355c2ec 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -605,16 +605,16 @@ test encoding-24.12 {Parse valid or invalid utf-8} { } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "\xC0\x81" -} -returnCodes 1 -result {unexpected byte at index 0: 'À' (\xC0)} +} -returnCodes 1 -result {unexpected byte at index 0: '\xC0'} test encoding-24.14 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "\xC1\xBF" -} -returnCodes 1 -result {unexpected byte at index 0: 'Á' (\xC1)} +} -returnCodes 1 -result {unexpected byte at index 0: '\xC1'} test encoding-24.15 {Parse valid or invalid utf-8} { string length [encoding convertfrom -stoponerror utf-8 "\xC2\x80"] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "Z\xE0\x80" -} -returnCodes 1 -result {unexpected byte at index 1: 'à' (\xE0)} +} -returnCodes 1 -result {unexpected byte at index 1: '\xE0'} test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto -stoponerror utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} @@ -626,7 +626,7 @@ test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring - } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto -stoponerror utf-8 "ZX\uD800" -} -returnCodes 1 -match glob -result "unexpected character at index 2: '\uD800' (U+00D800)" +} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" file delete [file join [temporaryDirectory] iso2022.txt] # -- cgit v0.12 From 99994365ea8c04611e93f3108f4a7d8d4e1ca49f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 7 May 2021 09:50:13 +0000 Subject: Parse simplifications and better errormessage. Not 100% correct yet --- generic/tclCmdAH.c | 36 +++++++++++++----------------------- tests/encoding.test | 6 +++--- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 1361f11..9cd8c12 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -550,24 +550,24 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ int length; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + const char *stopOnError = ""; +#else const char *stopOnError = NULL; +#endif size_t result; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if ((unsigned)(objc - 3) < 2) { + } else if ((unsigned)(objc - 2) < 3) { if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } data = objv[objc - 1]; if (objc > 3) { stopOnError = Tcl_GetString(objv[1]); - if (!stopOnError[0]) { -#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) - stopOnError = NULL; -#endif - } else if (stopOnError[0] == '-' && stopOnError[1] == 'n' + if (stopOnError[0] == '-' && stopOnError[1] == 'n' && !strncmp(stopOnError, "-nothrow", strlen(stopOnError))) { stopOnError = NULL; } else if (stopOnError[0] == '-' && stopOnError[1] == 's' @@ -575,10 +575,6 @@ EncodingConvertfromObjCmd( } else { goto encConvFromError; } -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - } else { - stopOnError = ""; -#endif } } else { encConvFromError: @@ -602,7 +598,7 @@ EncodingConvertfromObjCmd( if (stopOnError && (result != (size_t)-1)) { char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte at index %" + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); Tcl_SetErrorCode(interp, "TCL", "ENCODING", "STOPONERROR", buf, NULL); @@ -653,25 +649,23 @@ EncodingConverttoObjCmd( int length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ size_t result; +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + const char *stopOnError = ""; +#else const char *stopOnError = NULL; - - /* TODO - ADJUST OBJ INDICES WHEN ENSEMBLIFYING THIS */ +#endif if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if ((unsigned)(objc - 3) < 2) { + } else if ((unsigned)(objc - 2) < 3) { if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } data = objv[objc - 1]; if (objc > 3) { stopOnError = Tcl_GetString(objv[1]); - if (!stopOnError[0]) { -#if TCL_MAJOR_VERSION < 9 && !defined(TCL_NO_DEPRECATED) - stopOnError = NULL; -#endif - } else if (stopOnError[0] == '-' && stopOnError[1] == 'n' + if (stopOnError[0] == '-' && stopOnError[1] == 'n' && !strncmp(stopOnError, "-nothrow", strlen(stopOnError))) { stopOnError = NULL; } else if (stopOnError[0] == '-' && stopOnError[1] == 's' @@ -679,10 +673,6 @@ EncodingConverttoObjCmd( } else { goto encConvToError; } -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - } else { - stopOnError = ""; -#endif } } else { encConvToError: diff --git a/tests/encoding.test b/tests/encoding.test index 355c2ec..d30ef60 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -605,16 +605,16 @@ test encoding-24.12 {Parse valid or invalid utf-8} { } 1 test encoding-24.13 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "\xC0\x81" -} -returnCodes 1 -result {unexpected byte at index 0: '\xC0'} +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.14 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "\xC1\xBF" -} -returnCodes 1 -result {unexpected byte at index 0: '\xC1'} +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.15 {Parse valid or invalid utf-8} { string length [encoding convertfrom -stoponerror utf-8 "\xC2\x80"] } 1 test encoding-24.16 {Parse valid or invalid utf-8} -body { encoding convertfrom -stoponerror utf-8 "Z\xE0\x80" -} -returnCodes 1 -result {unexpected byte at index 1: '\xE0'} +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xE0'} test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto -stoponerror utf-8 [testbytestring "Z\u4343\x80"] } -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)} -- cgit v0.12 From c1591561bfc41b9b4bd3f4bf09929d419325c9ef Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 7 May 2021 13:44:53 +0000 Subject: doc fix --- doc/Encoding.3 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index d853977..73ad65d 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -25,13 +25,13 @@ int char * \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp -char * +size_t \fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) .sp char * \fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp -char * +size_t \fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) .sp int @@ -261,8 +261,8 @@ a pointer to the value stored in the DString. .PP \fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR, but it has an additional flags parameter. The return value is the index of -the first byte in the input string causing a conversion error. -Or (size_t)-1 if all is OK. +the first byte of an utf-8 byte-sequence in the input string causing a +conversion error. Or (size_t)-1 if all is OK. .PP \fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from -- cgit v0.12 From c19b90133a56c0adc06f764732d80720e60747a3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 7 May 2021 13:55:14 +0000 Subject: Double definition of TCL_ENCODING_MODIFIED and another doc fix --- doc/string.n | 2 +- generic/tclEncoding.c | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/doc/string.n b/doc/string.n index 6a10da8..f3d7616 100644 --- a/doc/string.n +++ b/doc/string.n @@ -419,7 +419,7 @@ command to convert a string to a known encoding (e.g. "utf-8" or "cesu-8") and then apply \fBstring length\fR to that. .PP .CS -\fBstring length\fR [encoding convertto wtf-8 $theString] +\fBstring length\fR [encoding convertto utf-8 $theString] .CE .RE .TP diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a53261e..17b00d6 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -512,7 +512,6 @@ FillEncodingFileMap(void) /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and * TCL_ENCODING_LE is only used for utf-16/ucs-2. re-use the same value */ -#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ #define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ -- cgit v0.12 From 95cd48673472309ca5a790f3d26e4a137c010a6b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 14 May 2021 10:52:53 +0000 Subject: One left-over wtf-8 mentioning, which is no longer part of TIP #597 --- generic/tcl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tcl.h b/generic/tcl.h index a5d0106..759adc9 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2075,7 +2075,7 @@ typedef struct Tcl_EncodingType { * produced is controlled only by other limiting * factors. * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of - * 0x00. Only valid for "utf-8", "wtf-8 and "cesu-8". + * 0x00. Only valid for "utf-8" and "cesu-8". * This flag is implicit for external -> internal conversions, * optional for internal -> external conversions. * TCL_ENCODING_NO_THROW - If set, the converter -- cgit v0.12 From b9bd6ffbf8851cfc23fefe5653355b201d12cf83 Mon Sep 17 00:00:00 2001 From: oehhar Date: Wed, 26 May 2021 06:44:40 +0000 Subject: TIP601 encoding stoponerror: document Tcl_ExternalToUtfDStringEx and Tcl_ExternalToUtfDStringEx --- generic/tclEncoding.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 62 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 17b00d6..1e56c12 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1093,7 +1093,7 @@ Tcl_CreateEncoding( /* *------------------------------------------------------------------------- * - * Tcl_ExternalToUtfDString/Tcl_ExternalToUtfDStringEx -- + * Tcl_ExternalToUtfDString -- * * Convert a source buffer from the specified encoding into UTF-8. If any * of the bytes in the source buffer are invalid or cannot be represented @@ -1125,6 +1125,35 @@ Tcl_ExternalToUtfDString( return Tcl_DStringValue(dstPtr); } + +/* + *------------------------------------------------------------------------- + * + * Tcl_ExternalToUtfDStringEx -- + * + * Convert a source buffer from the specified encoding into UTF-8. +* The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in utf-8. + * Possible flags values: + * TCLENCODINGSTOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCLENCODINGNO_THROW: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCLENCODINGMODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ size_t Tcl_ExternalToUtfDStringEx( @@ -1303,7 +1332,7 @@ Tcl_ExternalToUtf( /* *------------------------------------------------------------------------- * - * Tcl_UtfToExternalDString/Tcl_UtfToExternalDStringEx -- + * Tcl_UtfToExternalDString -- * * Convert a source buffer from UTF-8 to the specified encoding. If any * of the bytes in the source buffer are invalid or cannot be represented @@ -1335,6 +1364,37 @@ Tcl_UtfToExternalDString( return Tcl_DStringValue(dstPtr); } + +/* + *------------------------------------------------------------------------- + * + * Tcl_UtfToExternalDStringEx -- + * + * Convert a source buffer from UTF-8 to the specified encoding. + * The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in the + * target encoding. + * Possible flags values: + * TCLENCODINGSTOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCLENCODINGNO_THROW: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCLENCODINGMODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + size_t Tcl_UtfToExternalDStringEx( Tcl_Encoding encoding, /* The encoding for the converted string, or -- cgit v0.12 From 5ab98c9b65c66ac15cafc95755202ac31b237450 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 26 May 2021 08:36:28 +0000 Subject: Add underscores in flag names --- generic/tclEncoding.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1e56c12..7a9f0b7 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1135,11 +1135,11 @@ Tcl_ExternalToUtfDString( * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: - * TCLENCODINGSTOPONERROR: don't replace invalid characters/bytes but + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but * return the first error position (Default in Tcl 9.0). - * TCLENCODINGNO_THROW: replace invalid characters/bytes by a default + * TCL_ENCODING_NO_THROW: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). - * TCLENCODINGMODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. * Only valid for "utf-8" and "cesu-8". This flag may be used together * with the other flags. * @@ -1375,11 +1375,11 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. * Possible flags values: - * TCLENCODINGSTOPONERROR: don't replace invalid characters/bytes but + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but * return the first error position (Default in Tcl 9.0). - * TCLENCODINGNO_THROW: replace invalid characters/bytes by a default + * TCL_ENCODING_NO_THROW: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). - * TCLENCODINGMODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. * Only valid for "utf-8" and "cesu-8". This flag may be used together * with the other flags. * -- cgit v0.12 From 588e5a48cb262a4fa3d60698be3f1d94434dfcf1 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 26 May 2021 14:04:00 +0000 Subject: Handle the situation when there is "-nothrow" or "-stoponerror" but without providing encoding --- generic/tclCmdAH.c | 77 +++++++++++++++++++++++++++++++---------------------- tests/encoding.test | 6 +++++ 2 files changed, 51 insertions(+), 32 deletions(-) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 0f9aa27..6549648 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -561,21 +561,26 @@ EncodingConvertfromObjCmd( encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; } else if ((unsigned)(objc - 2) < 3) { - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } data = objv[objc - 1]; - if (objc > 3) { - bytesPtr = Tcl_GetString(objv[1]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nothrow", strlen(bytesPtr))) { - flags = TCL_ENCODING_NO_THROW; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-stoponerror", strlen(bytesPtr))) { - flags = TCL_ENCODING_STOPONERROR; - } else { - goto encConvFromError; + bytesPtr = Tcl_GetString(objv[1]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' + && !strncmp(bytesPtr, "-nothrow", strlen(bytesPtr))) { + flags = TCL_ENCODING_NO_THROW; + } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' + && !strncmp(bytesPtr, "-stoponerror", strlen(bytesPtr))) { + flags = TCL_ENCODING_STOPONERROR; + } else if (objc < 4) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; } + goto encConvFromOK; + } else { + goto encConvFromError; + } + if (objc < 4) { + encoding = Tcl_GetEncoding(interp, NULL); + } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; } } else { encConvFromError: @@ -583,16 +588,18 @@ EncodingConvertfromObjCmd( return TCL_ERROR; } +encConvFromOK: /* * Convert the string into a byte array in 'ds' */ - if (flags & TCL_ENCODING_STOPONERROR) { - bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); - if (bytesPtr == NULL) { - return TCL_ERROR; - } - } else { +#if !defined(TCL_NO_DEPRECATED) && (TCL_MAJOR_VERSION < 9) + if (!(flags & TCL_ENCODING_STOPONERROR)) { bytesPtr = (char *) Tcl_GetByteArrayFromObj(data, &length); + } else +#endif + bytesPtr = (char *) TclGetBytesFromObj(interp, data, &length); + if (bytesPtr == NULL) { + return TCL_ERROR; } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); @@ -660,21 +667,26 @@ EncodingConverttoObjCmd( encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; } else if ((unsigned)(objc - 2) < 3) { - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } data = objv[objc - 1]; - if (objc > 3) { - stringPtr = Tcl_GetString(objv[1]); - if (stringPtr[0] == '-' && stringPtr[1] == 'n' - && !strncmp(stringPtr, "-nothrow", strlen(stringPtr))) { - flags = TCL_ENCODING_NO_THROW; - } else if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-stoponerror", strlen(stringPtr))) { - flags = TCL_ENCODING_STOPONERROR; - } else { - goto encConvToError; + stringPtr = Tcl_GetString(objv[1]); + if (stringPtr[0] == '-' && stringPtr[1] == 'n' + && !strncmp(stringPtr, "-nothrow", strlen(stringPtr))) { + flags = TCL_ENCODING_NO_THROW; + } else if (stringPtr[0] == '-' && stringPtr[1] == 's' + && !strncmp(stringPtr, "-stoponerror", strlen(stringPtr))) { + flags = TCL_ENCODING_STOPONERROR; + } else if (objc < 4) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; } + goto encConvToOK; + } else { + goto encConvToError; + } + if (objc < 4) { + encoding = Tcl_GetEncoding(interp, NULL); + } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; } } else { encConvToError: @@ -682,6 +694,7 @@ EncodingConverttoObjCmd( return TCL_ERROR; } +encConvToOK: /* * Convert the string to a byte array in 'ds' */ diff --git a/tests/encoding.test b/tests/encoding.test index d30ef60..55ace7f 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -627,6 +627,12 @@ test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto -stoponerror utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" +test encoding-24.21 {Parse with -nothrow but without providing encoding} { + string length [encoding convertfrom -nothrow "\x20"] +} 1 +test encoding-24.22 {Parse with -nothrow but without providing encoding} { + string length [encoding convertto -nothrow "\x20"] +} 1 file delete [file join [temporaryDirectory] iso2022.txt] # -- cgit v0.12 From 20fcf335945daf5dedf6f10f940026b681dd7f1b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 26 May 2021 14:10:30 +0000 Subject: More testcases regarding possible parse errors --- tests/encoding.test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/encoding.test b/tests/encoding.test index 55ace7f..bdebad9 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -624,7 +624,7 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.19 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto -stoponerror utf-8 [testbytestring "Z\xE0\x80xxxxxx"] } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" -test encoding-24.20 {Parse valid or invalid utf-8} -constraints testbytestring -body { +test encoding-24.20 {Parse valid or invalid utf-8} -body { encoding convertto -stoponerror utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.21 {Parse with -nothrow but without providing encoding} { @@ -633,6 +633,19 @@ test encoding-24.21 {Parse with -nothrow but without providing encoding} { test encoding-24.22 {Parse with -nothrow but without providing encoding} { string length [encoding convertto -nothrow "\x20"] } 1 +test encoding-24.23 {Syntax error, two encodings} -body { + encoding convertfrom iso8859-1 utf-8 "ZX\uD800" +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nothrow|-stoponerror? ?encoding? data"} +test encoding-24.24 {Syntax error, two encodings} -body { + encoding convertto iso8859-1 utf-8 "ZX\uD800" +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nothrow|-stoponerror? ?encoding? data"} +test encoding-24.25 {Syntax error, two options} -body { + encoding convertfrom -nothrow -stoponerror "ZX\uD800" +} -returnCodes 1 -result {unknown encoding "-stoponerror"} +test encoding-24.26 {Syntax error, two options} -body { + encoding convertto -nothrow -stoponerror "ZX\uD800" +} -returnCodes 1 -result {unknown encoding "-stoponerror"} + file delete [file join [temporaryDirectory] iso2022.txt] # -- cgit v0.12 From 6ec7e10a1634a0a9c10ed2cf90072ba723d701ce Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 5 Mar 2022 21:56:47 +0000 Subject: -nothrow -> -nocomplain --- doc/Encoding.3 | 2 +- generic/tcl.h | 4 ++-- generic/tclCmdAH.c | 22 ++++++++-------------- generic/tclEncoding.c | 6 +++--- tests/cmdAH.test | 4 ++-- tests/encoding.test | 52 +++++++++++++++++++++++++-------------------------- tests/safe.test | 8 ++++---- 7 files changed, 46 insertions(+), 52 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index bffa0c3..dc37519 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -114,7 +114,7 @@ byte is converted and then to reset to an initial state. \fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should return immediately upon reading a source character that does not exist in the target encoding; otherwise a default fallback character will -automatically be substituted. The flag \fBTCL_ENCODING_NO_THROW\fR has +automatically be substituted. The flag \fBTCL_ENCODING_NOCOMPLAIN\fR has no effect, it is reserved for Tcl 9.0. The flag \fBTCL_ENCODING_MODIFIED\fR makes \fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. diff --git a/generic/tcl.h b/generic/tcl.h index 783d576..ef0fa75 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -2081,7 +2081,7 @@ typedef struct Tcl_EncodingType { * 0x00. Only valid for "utf-8" and "cesu-8". * This flag is implicit for external -> internal conversions, * optional for internal -> external conversions. - * TCL_ENCODING_NO_THROW - If set, the converter + * TCL_ENCODING_NOCOMPLAIN - If set, the converter * substitutes the problematic character(s) with * one or more "close" characters in the * destination buffer and then continues to @@ -2097,7 +2097,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 -#define TCL_ENCODING_NO_THROW 0x40 +#define TCL_ENCODING_NOCOMPLAIN 0x40 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 5b655ef..60a2c42 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -553,7 +553,7 @@ EncodingConvertfromObjCmd( #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) int flags = TCL_ENCODING_STOPONERROR; #else - int flags = TCL_ENCODING_NO_THROW; + int flags = TCL_ENCODING_NOCOMPLAIN; #endif size_t result; @@ -564,11 +564,8 @@ EncodingConvertfromObjCmd( data = objv[objc - 1]; bytesPtr = Tcl_GetString(objv[1]); if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nothrow", strlen(bytesPtr))) { - flags = TCL_ENCODING_NO_THROW; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-stoponerror", strlen(bytesPtr))) { - flags = TCL_ENCODING_STOPONERROR; + && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { + flags = TCL_ENCODING_NOCOMPLAIN; } else if (objc < 4) { if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; @@ -584,7 +581,7 @@ EncodingConvertfromObjCmd( } } else { encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nothrow? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); return TCL_ERROR; } @@ -660,7 +657,7 @@ EncodingConverttoObjCmd( #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) int flags = TCL_ENCODING_STOPONERROR; #else - int flags = TCL_ENCODING_NO_THROW; + int flags = TCL_ENCODING_NOCOMPLAIN; #endif if (objc == 2) { @@ -670,11 +667,8 @@ EncodingConverttoObjCmd( data = objv[objc - 1]; stringPtr = Tcl_GetString(objv[1]); if (stringPtr[0] == '-' && stringPtr[1] == 'n' - && !strncmp(stringPtr, "-nothrow", strlen(stringPtr))) { - flags = TCL_ENCODING_NO_THROW; - } else if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-stoponerror", strlen(stringPtr))) { - flags = TCL_ENCODING_STOPONERROR; + && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { + flags = TCL_ENCODING_NOCOMPLAIN; } else if (objc < 4) { if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; @@ -690,7 +684,7 @@ EncodingConverttoObjCmd( } } else { encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nothrow? ?encoding? data"); + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); return TCL_ERROR; } diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index d1dbb09..b6d5dcf 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1158,7 +1158,7 @@ Tcl_ExternalToUtfDString( * Possible flags values: * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but * return the first error position (Default in Tcl 9.0). - * TCL_ENCODING_NO_THROW: replace invalid characters/bytes by a default + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. * Only valid for "utf-8" and "cesu-8". This flag may be used together @@ -1397,7 +1397,7 @@ Tcl_UtfToExternalDString( * Possible flags values: * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but * return the first error position (Default in Tcl 9.0). - * TCL_ENCODING_NO_THROW: replace invalid characters/bytes by a default + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. * Only valid for "utf-8" and "cesu-8". This flag may be used together @@ -2288,7 +2288,7 @@ BinaryProc( */ #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) -# define STOPONERROR !(flags & TCL_ENCODING_NO_THROW) +# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN) #else # define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) #endif diff --git a/tests/cmdAH.test b/tests/cmdAH.test index 7f86275..d787c7f 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -178,7 +178,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?-nothrow? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -200,7 +200,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?-nothrow? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} diff --git a/tests/encoding.test b/tests/encoding.test index c6865d9..bf82493 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -299,7 +299,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto -nothrow iso8859-3 Õ] + append x [encoding convertto -nocomplain iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -348,67 +348,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto -nothrow utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {10 edb882f09f9882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto -nothrow utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto -nothrow utf-8 \uDE02\uD83Dé] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto -nothrow utf-8 \uDE02\uD83DX] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto -nothrow utf-8 \uDE02é] + set y [encoding convertto -nocomplain utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto -nothrow utf-8 \uDA02é] + set y [encoding convertto -nocomplain utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto -nothrow utf-8 \uDE02Y] + set y [encoding convertto -nocomplain utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto -nothrow utf-8 \uDA02Y] + set y [encoding convertto -nocomplain utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto -nothrow utf-8 \uDE02] + set y [encoding convertto -nocomplain utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto -nothrow utf-8 \uDA02] + set y [encoding convertto -nocomplain utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom -nothrow utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -489,10 +489,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto -nothrow utf-16be "\uDCDC" + encoding convertto -nocomplain utf-16be "\uDCDC" } -result "\xFF\xFD" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto -nothrow utf-16le "\uD8D8" + encoding convertto -nocomplain utf-16le "\uD8D8" } -result "\xFD\xFF" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" @@ -617,25 +617,25 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nothrow utf-8 "\xC0\x81"] + string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nothrow utf-8 "\xC1\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nothrow utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nothrow utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom -nothrow utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertfrom utf-8 "\xC0\x81" @@ -661,18 +661,18 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints {testbytestring test encoding-24.19 {Parse valid or invalid utf-8} -constraints deprecated -body { encoding convertto utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" -test encoding-24.20 {Parse with -nothrow but without providing encoding} { - string length [encoding convertfrom -nothrow "\x20"] +test encoding-24.20 {Parse with -nocomplain but without providing encoding} { + string length [encoding convertfrom -nocomplain "\x20"] } 1 -test encoding-24.21 {Parse with -nothrow but without providing encoding} { - string length [encoding convertto -nothrow "\x20"] +test encoding-24.21 {Parse with -nocomplain but without providing encoding} { + string length [encoding convertto -nocomplain "\x20"] } 1 test encoding-24.22 {Syntax error, two encodings} -body { encoding convertfrom iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nothrow? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?encoding? data"} test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" -} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nothrow? ?encoding? data"} +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"} file delete [file join [temporaryDirectory] iso2022.txt] @@ -828,7 +828,7 @@ test encoding-28.0 {all encodings load} -body { set string hello foreach name [encoding names] { incr count - encoding convertto -nothrow $name $string + encoding convertto -nocomplain $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. diff --git a/tests/safe.test b/tests/safe.test index d5e2f00..5f3eae8 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nothrow? ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?-nothrow? ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1291,7 +1291,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nothrow? ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1300,7 +1300,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?-nothrow? ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data" while executing "encoding convertto" invoked from within -- cgit v0.12