diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-17 13:44:04 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2022-03-17 13:44:04 (GMT) |
| commit | 25473372703a8ba0a8bba93c36afad63b8a9e2f6 (patch) | |
| tree | 55a8af5f981022aa7202006bd5d7bb3a626e2182 | |
| parent | fb7684bbbf09e0c6e6328056be102e5069ab600f (diff) | |
| parent | 7056164cd356e6aed9a9290471a7029c35a606f5 (diff) | |
| download | tcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.zip tcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.tar.gz tcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.tar.bz2 | |
Merge 9.0
| -rw-r--r-- | doc/Encoding.3 | 21 | ||||
| -rw-r--r-- | generic/tcl.decls | 8 | ||||
| -rw-r--r-- | generic/tcl.h | 18 | ||||
| -rw-r--r-- | generic/tclCmdAH.c | 94 | ||||
| -rw-r--r-- | generic/tclDecls.h | 20 | ||||
| -rw-r--r-- | generic/tclEncoding.c | 136 | ||||
| -rw-r--r-- | generic/tclStubInit.c | 4 | ||||
| -rw-r--r-- | tests/chanio.test | 12 | ||||
| -rw-r--r-- | tests/cmdAH.test | 4 | ||||
| -rw-r--r-- | tests/encoding.test | 76 | ||||
| -rw-r--r-- | tests/http.test | 4 | ||||
| -rw-r--r-- | tests/io.test | 16 | ||||
| -rw-r--r-- | tests/main.test | 2 | ||||
| -rw-r--r-- | tests/safe.test | 8 | ||||
| -rw-r--r-- | tests/source.test | 4 | ||||
| -rw-r--r-- | win/rules.vc | 13 |
16 files changed, 344 insertions, 96 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3 index c36744b..663cd3f 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -25,9 +25,15 @@ int char * \fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp +size_t +\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +.sp char * \fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR) .sp +size_t +\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR) +.sp int \fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr\fR) @@ -96,7 +102,10 @@ byte is converted and then to reset to an initial state. \fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should return immediately upon reading a source character that does not exist in the target encoding; otherwise a default fallback character will -automatically be substituted. +automatically be substituted. The flag \fBTCL_ENCODING_NOCOMPLAIN\fR has +no effect, it is reserved for Tcl 9.0. The flag \fBTCL_ENCODING_MODIFIED\fR makes +\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the +byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -196,6 +205,11 @@ When converting, if any of the characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP +\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR, +but it has an additional flags parameter. The return value is the index of +the first byte in the input string causing a conversion error. +Or (size_t)-1 if all is OK. +.PP \fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified \fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the source buffer and up to \fIdstLen\fR converted bytes are stored in \fIdst\fR. @@ -234,6 +248,11 @@ characters in the source buffer cannot be represented in the target encoding, a default fallback character will be used. The return value is a pointer to the value stored in the DString. .PP +\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR, +but it has an additional flags parameter. The return value is the index of +the first byte of an utf-8 byte-sequence in the input string causing a +conversion error. Or (size_t)-1 if all is OK. +.PP \fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from the source buffer and up to \fIdstLen\fR converted bytes are stored in diff --git a/generic/tcl.decls b/generic/tcl.decls index 7d035c5..c2bbf56 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2498,6 +2498,14 @@ declare 656 { declare 657 { int Tcl_UniCharIsUnicode(int ch) } +declare 658 { + size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr) +} +declare 659 { + size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr) +} # TIP #511 declare 660 { diff --git a/generic/tcl.h b/generic/tcl.h index a4dec97..2757eff 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1869,10 +1869,10 @@ typedef struct Tcl_EncodingType { * encountering an invalid byte sequence or a * source character that has no mapping in the * target encoding. If clear, the converter - * substitues the problematic character(s) with + * substitutes the problematic character(s) with * one or more "close" characters in the * destination buffer and then continues to - * convert the source. + * convert the source. Only for Tcl 8.x. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need * an extra byte for a terminating NUL, it fills @@ -1887,6 +1887,18 @@ typedef struct Tcl_EncodingType { * content. Otherwise, the number of chars * produced is controlled only by other limiting * factors. + * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of + * 0x00. Only valid for "utf-8" and "cesu-8". + * This flag is implicit for external -> internal conversions, + * optional for internal -> external conversions. + * TCL_ENCODING_NOCOMPLAIN - If set, the converter + * substitutes the problematic character(s) with + * one or more "close" characters in the + * destination buffer and then continues to + * convert the source. If clear, the converter returns + * immediately upon encountering an invalid byte sequence + * or a source character that has no mapping in the + * target encoding. Only for Tcl 9.x. */ #define TCL_ENCODING_START 0x01 @@ -1894,6 +1906,8 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_STOPONERROR 0x04 #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 +#define TCL_ENCODING_MODIFIED 0x20 +#define TCL_ENCODING_NOCOMPLAIN 0x40 /* * The following definitions are the error codes returned by the conversion diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index d58b92c..36d9867 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -377,8 +377,8 @@ TclInitEncodingCmd( Tcl_Interp* interp) /* Tcl interpreter */ { static const EnsembleImplMap encodingImplMap[] = { - {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, - {"convertto", EncodingConverttoObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0}, + {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, + {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, {"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, @@ -414,29 +414,61 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length = 0; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + int flags = TCL_ENCODING_STOPONERROR; +#else + int flags = TCL_ENCODING_NOCOMPLAIN; +#endif + size_t result; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc == 3) { - if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { + } else if ((unsigned)(objc - 2) < 3) { + data = objv[objc - 1]; + bytesPtr = Tcl_GetString(objv[1]); + if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' + && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { + flags = TCL_ENCODING_NOCOMPLAIN; + } else if (objc < 4) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + goto encConvFromOK; + } else { + goto encConvFromError; + } + if (objc < 4) { + encoding = Tcl_GetEncoding(interp, NULL); + } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } - data = objv[2]; } else { - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data"); + encConvFromError: + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); return TCL_ERROR; } +encConvFromOK: /* * Convert the string into a byte array in 'ds' */ bytesPtr = (char *) Tcl_GetBytesFromObj(interp, data, &length); if (bytesPtr == NULL) { - Tcl_FreeEncoding(encoding); return TCL_ERROR; } - Tcl_ExternalToUtfDString(encoding, bytesPtr, length, &ds); + result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, + flags, &ds); + if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) { + char buf[TCL_INTEGER_SPACE]; + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" + TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } /* * Note that we cannot use Tcl_DStringResult here because it will @@ -480,26 +512,62 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ + size_t result; +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + int flags = TCL_ENCODING_STOPONERROR; +#else + int flags = TCL_ENCODING_NOCOMPLAIN; +#endif if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); data = objv[1]; - } else if (objc == 3) { - if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) { + } else if ((unsigned)(objc - 2) < 3) { + data = objv[objc - 1]; + stringPtr = Tcl_GetString(objv[1]); + if (stringPtr[0] == '-' && stringPtr[1] == 'n' + && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { + flags = TCL_ENCODING_NOCOMPLAIN; + } else if (objc < 4) { + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { + return TCL_ERROR; + } + goto encConvToOK; + } else { + goto encConvToError; + } + if (objc < 4) { + encoding = Tcl_GetEncoding(interp, NULL); + } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { return TCL_ERROR; } - data = objv[2]; } else { - Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data"); + encConvToError: + Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data"); return TCL_ERROR; } +encConvToOK: /* * Convert the string to a byte array in 'ds' */ stringPtr = Tcl_GetStringFromObj(data, &length); - Tcl_UtfToExternalDString(encoding, stringPtr, length, &ds); + result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, + flags, &ds); + if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) { + size_t pos = Tcl_NumUtfChars(stringPtr, result); + int ucs4; + char buf[TCL_INTEGER_SPACE]; + TclUtfToUCS4(&stringPtr[result], &ucs4); + sprintf(buf, "%" TCL_Z_MODIFIER "u", result); + Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" + TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); + Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", + buf, NULL); + Tcl_DStringFree(&ds); + return TCL_ERROR; + } Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); diff --git a/generic/tclDecls.h b/generic/tclDecls.h index a19f781..71a4599 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1756,8 +1756,14 @@ EXTERN const char * Tcl_UtfNext(const char *src); EXTERN const char * Tcl_UtfPrev(const char *src, const char *start); /* 657 */ EXTERN int Tcl_UniCharIsUnicode(int ch); -/* Slot 658 is reserved */ -/* Slot 659 is reserved */ +/* 658 */ +EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding, + const char *src, size_t srcLen, int flags, + Tcl_DString *dsPtr); +/* 659 */ +EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding, + const char *src, size_t srcLen, int flags, + Tcl_DString *dsPtr); /* 660 */ EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async, int sigNumber); @@ -2456,8 +2462,8 @@ typedef struct TclStubs { const char * (*tcl_UtfNext) (const char *src); /* 655 */ const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */ int (*tcl_UniCharIsUnicode) (int ch); /* 657 */ - void (*reserved658)(void); - void (*reserved659)(void); + size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr); /* 658 */ + size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr); /* 659 */ int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */ int (*tcl_ListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */ int (*tcl_ListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */ @@ -3734,8 +3740,10 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_UtfPrev) /* 656 */ #define Tcl_UniCharIsUnicode \ (tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */ -/* Slot 658 is reserved */ -/* Slot 659 is reserved */ +#define Tcl_ExternalToUtfDStringEx \ + (tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 658 */ +#define Tcl_UtfToExternalDStringEx \ + (tclStubsPtr->tcl_UtfToExternalDStringEx) /* 659 */ #define Tcl_AsyncMarkFromSignal \ (tclStubsPtr->tcl_AsyncMarkFromSignal) /* 660 */ #define Tcl_ListObjGetElements \ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index b637ce1..7b77282 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -515,10 +515,8 @@ FillEncodingFileMap(void) *--------------------------------------------------------------------------- */ -/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */ /* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and * TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */ -#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */ #define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */ #define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */ @@ -1080,11 +1078,57 @@ Tcl_ExternalToUtfDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, 0, dstPtr); + return Tcl_DStringValue(dstPtr); +} + + +/* + *------------------------------------------------------------------------- + * + * Tcl_ExternalToUtfDStringEx -- + * + * Convert a source buffer from the specified encoding into UTF-8. +* The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in utf-8. + * Possible flags values: + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +size_t +Tcl_ExternalToUtfDStringEx( + Tcl_Encoding encoding, /* The encoding for the source string, or NULL + * for the default system encoding. */ + const char *src, /* Source string in specified encoding. */ + size_t srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for + * encoding-specific string length. */ + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; size_t dstLen; + const char *srcStart = src; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1101,7 +1145,7 @@ Tcl_ExternalToUtfDString( srcLen = encodingPtr->lengthProc(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; if (encodingPtr->toUtfProc == UtfToUtfProc) { flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF; } @@ -1114,7 +1158,7 @@ Tcl_ExternalToUtfDString( src += srcRead; if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); - return Tcl_DStringValue(dstPtr); + return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart); } flags &= ~TCL_ENCODING_START; srcLen -= srcRead; @@ -1273,10 +1317,57 @@ Tcl_UtfToExternalDString( Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { + Tcl_UtfToExternalDStringEx(encoding, src, srcLen, 0, dstPtr); + return Tcl_DStringValue(dstPtr); +} + + +/* + *------------------------------------------------------------------------- + * + * Tcl_UtfToExternalDStringEx -- + * + * Convert a source buffer from UTF-8 to the specified encoding. + * The parameter flags controls the behavior, if any of the bytes in + * the source buffer are invalid or cannot be represented in the + * target encoding. + * Possible flags values: + * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but + * return the first error position (Default in Tcl 9.0). + * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default + * fallback character. Always return -1 (Default in Tcl 8.7). + * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. + * Only valid for "utf-8" and "cesu-8". This flag may be used together + * with the other flags. + * + * Results: + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is + * the error position in the source string or -1 if no conversion error + * is reported. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +size_t +Tcl_UtfToExternalDStringEx( + Tcl_Encoding encoding, /* The encoding for the converted string, or + * NULL for the default system encoding. */ + const char *src, /* Source string in UTF-8. */ + size_t srcLen, /* Source string length in bytes, or < 0 for + * strlen(). */ + int flags, /* Conversion control flags. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ +{ char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, result, soFar, srcRead, dstWrote, dstChars; + int result, soFar, srcRead, dstWrote, dstChars; + const char *srcStart = src; size_t dstLen; Tcl_DStringInit(dstPtr); @@ -1293,7 +1384,7 @@ Tcl_UtfToExternalDString( } else if (srcLen == TCL_INDEX_NONE) { srcLen = strlen(src); } - flags = TCL_ENCODING_START | TCL_ENCODING_END; + flags |= TCL_ENCODING_START | TCL_ENCODING_END; while (1) { result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, @@ -1306,7 +1397,7 @@ Tcl_UtfToExternalDString( while (i >= soFar) { Tcl_DStringSetLength(dstPtr, i--); } - return Tcl_DStringValue(dstPtr); + return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart); } flags &= ~TCL_ENCODING_START; @@ -2134,6 +2225,12 @@ BinaryProc( *------------------------------------------------------------------------- */ +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) +# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN) +#else +# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) +#endif + static int UtfToUtfProc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ @@ -2216,7 +2313,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -2231,7 +2328,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR) + if ((len < 2) && (ch != 0) && STOPONERROR && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; @@ -2256,7 +2353,8 @@ UtfToUtfProc( len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0; if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { - if (flags & TCL_ENCODING_STOPONERROR) { + + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2271,7 +2369,7 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); ch = low; } else if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2457,7 +2555,7 @@ UtfToUtf32Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2660,7 +2758,7 @@ UtfToUtf16Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2880,7 +2978,7 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_SYNTAX; break; } @@ -2996,7 +3094,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3184,7 +3282,7 @@ Iso88591FromUtfProc( || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3411,7 +3509,7 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if ((flags & TCL_ENCODING_STOPONERROR) == 0) { + if (!STOPONERROR) { /* * Skip the unknown escape sequence. */ @@ -3586,7 +3684,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (flags & TCL_ENCODING_STOPONERROR) { + if (STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index 19cf8c1..a75ef74 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1451,8 +1451,8 @@ const TclStubs tclStubs = { Tcl_UtfNext, /* 655 */ Tcl_UtfPrev, /* 656 */ Tcl_UniCharIsUnicode, /* 657 */ - 0, /* 658 */ - 0, /* 659 */ + Tcl_ExternalToUtfDStringEx, /* 658 */ + Tcl_UtfToExternalDStringEx, /* 659 */ Tcl_AsyncMarkFromSignal, /* 660 */ Tcl_ListObjGetElements, /* 661 */ Tcl_ListObjLength, /* 662 */ diff --git a/tests/chanio.test b/tests/chanio.test index 2d26ac9..11a4e74 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -249,7 +249,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod } -cleanup { chan close $f } -result "\r\n12" -test chan-io-3.4 {WriteChars: loop over stage buffer} { +test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] chan configure $f -encoding jis0208 -buffersize 16 @@ -257,8 +257,8 @@ test chan-io-3.4 {WriteChars: loop over stage buffer} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test chan-io-3.5 {WriteChars: saved != 0} { +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} +test chan-io-3.5 {WriteChars: saved != 0} -body { # Bytes produced by UtfToExternal from end of last channel buffer had to # be moved to beginning of next channel buffer to preserve requested # buffersize. @@ -268,7 +268,7 @@ test chan-io-3.5 {WriteChars: saved != 0} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { # One incomplete UTF-8 character at end of staging buffer. Backup in src # to the beginning of that UTF-8 character and try again. @@ -285,7 +285,7 @@ test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { chan close $f lappend x [contents $path(test1)] } [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] -test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { +test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # When translating UTF-8 to external, the produced bytes went past end of # the channel buffer. This is done on purpose - we then truncate the bytes # at the end of the partial character to preserve the requested blocksize @@ -297,7 +297,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { set x [list [contents $path(test1)]] chan close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test chan-io-3.8 {WriteChars: reset sawLF after each buffer} { set f [open $path(test1) w] chan configure $f -encoding ascii -buffering line -translation lf \ diff --git a/tests/cmdAH.test b/tests/cmdAH.test index ba2a97c..d3e46cf 100644 --- a/tests/cmdAH.test +++ b/tests/cmdAH.test @@ -172,7 +172,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body { } -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system} test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto -} -result {wrong # args: should be "encoding convertto ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"} test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertto foo bar } -result {unknown encoding "foo"} @@ -194,7 +194,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup { } -result 8C test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom -} -result {wrong # args: should be "encoding convertfrom ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"} test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body { encoding convertfrom foo bar } -result {unknown encoding "foo"} diff --git a/tests/encoding.test b/tests/encoding.test index c99dc71..fffcdd5 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -297,7 +297,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} { test encoding-12.1 {LoadTableEncoding: normal encoding} { set x [encoding convertto iso8859-3 Ġ] - append x [encoding convertto iso8859-3 Õ] + append x [encoding convertto -nocomplain iso8859-3 Õ] append x [encoding convertfrom iso8859-3 Õ] } "Õ?Ġ" test encoding-12.2 {LoadTableEncoding: single-byte encoding} { @@ -346,67 +346,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} { } "4 😂" test encoding-15.6 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uDE02\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D] binary scan $y H* z list [string length $y] $z } {10 edb882f09f9882eda0bd} test encoding-15.7 {UtfToUtfProc emoji character output} { set x \uDE02\uD83D\uD83D - set y [encoding convertto utf-8 \uDE02\uD83D\uD83D] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D] binary scan $y H* z list [string length $x] [string length $y] $z } {3 9 edb882eda0bdeda0bd} test encoding-15.8 {UtfToUtfProc emoji character output} { set x \uDE02\uD83Dé - set y [encoding convertto utf-8 \uDE02\uD83Dé] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé] binary scan $y H* z list [string length $x] [string length $y] $z } {3 8 edb882eda0bdc3a9} test encoding-15.9 {UtfToUtfProc emoji character output} { set x \uDE02\uD83DX - set y [encoding convertto utf-8 \uDE02\uD83DX] + set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX] binary scan $y H* z list [string length $x] [string length $y] $z } {3 7 edb882eda0bd58} test encoding-15.10 {UtfToUtfProc high surrogate character output} { set x \uDE02é - set y [encoding convertto utf-8 \uDE02é] + set y [encoding convertto -nocomplain utf-8 \uDE02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 edb882c3a9} test encoding-15.11 {UtfToUtfProc low surrogate character output} { set x \uDA02é - set y [encoding convertto utf-8 \uDA02é] + set y [encoding convertto -nocomplain utf-8 \uDA02é] binary scan $y H* z list [string length $x] [string length $y] $z } {2 5 eda882c3a9} test encoding-15.12 {UtfToUtfProc high surrogate character output} { set x \uDE02Y - set y [encoding convertto utf-8 \uDE02Y] + set y [encoding convertto -nocomplain utf-8 \uDE02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 edb88259} test encoding-15.13 {UtfToUtfProc low surrogate character output} { set x \uDA02Y - set y [encoding convertto utf-8 \uDA02Y] + set y [encoding convertto -nocomplain utf-8 \uDA02Y] binary scan $y H* z list [string length $x] [string length $y] $z } {2 4 eda88259} test encoding-15.14 {UtfToUtfProc high surrogate character output} { set x \uDE02 - set y [encoding convertto utf-8 \uDE02] + set y [encoding convertto -nocomplain utf-8 \uDE02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 edb882} test encoding-15.15 {UtfToUtfProc low surrogate character output} { set x \uDA02 - set y [encoding convertto utf-8 \uDA02] + set y [encoding convertto -nocomplain utf-8 \uDA02] binary scan $y H* z list [string length $x] [string length $y] $z } {1 3 eda882} test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} { set x \xF0\xA0\xA1\xC2 - set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2] + set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2] list [string length $x] $y } "4 \xF0\xA0\xA1\xC2" test encoding-15.17 {UtfToUtfProc emoji character output} { @@ -487,10 +487,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body { encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"] } -result "\uFFFD" test encoding-17.3 {UtfToUtf16Proc} -body { - encoding convertto utf-16be "\uDCDC" + encoding convertto -nocomplain utf-16be "\uDCDC" } -result "\xFF\xFD" test encoding-17.4 {UtfToUtf16Proc} -body { - encoding convertto utf-16le "\uD8D8" + encoding convertto -nocomplain utf-16le "\uD8D8" } -result "\xFD\xFF" test encoding-17.5 {UtfToUtf16Proc} -body { encoding convertto utf-32le "\U460DC" @@ -615,26 +615,62 @@ test encoding-24.4 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x81"] + string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"] } 2 test encoding-24.6 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC1\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"] } 2 test encoding-24.7 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.8 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x80\x80"] + string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"] } 3 test encoding-24.9 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"] } 3 test encoding-24.10 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xE0\xA0\x80"] } 1 test encoding-24.11 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"] + string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"] } 1 +test encoding-24.12 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC0\x81" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test encoding-24.13 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "\xC1\xBF" +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} +test encoding-24.14 {Parse valid or invalid utf-8} { + string length [encoding convertfrom utf-8 "\xC2\x80"] +} 1 +test encoding-24.15 {Parse valid or invalid utf-8} -body { + encoding convertfrom utf-8 "Z\xE0\x80" +} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xE0'} +test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\u4343\x80"] +} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃' (U+004343)} +test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80"] +} -result "Z\xC3\xA0\xE2\x82\xAC" +test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { + encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] +} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" +test encoding-24.19 {Parse valid or invalid utf-8} -body { + encoding convertto utf-8 "ZX\uD800" +} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" +test encoding-24.20 {Parse with -nocomplain but without providing encoding} { + string length [encoding convertfrom -nocomplain "\x20"] +} 1 +test encoding-24.21 {Parse with -nocomplain but without providing encoding} { + string length [encoding convertto -nocomplain "\x20"] +} 1 +test encoding-24.22 {Syntax error, two encodings} -body { + encoding convertfrom iso8859-1 utf-8 "ZX\uD800" +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?encoding? data"} +test encoding-24.23 {Syntax error, two encodings} -body { + encoding convertto iso8859-1 utf-8 "ZX\uD800" +} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"} file delete [file join [temporaryDirectory] iso2022.txt] @@ -792,7 +828,7 @@ test encoding-28.0 {all encodings load} -body { if {$name ne "unicode"} { incr count } - encoding convertto $name $string + encoding convertto -nocomplain $name $string # discard the cached internal representation of Tcl_Encoding # Unfortunately, without this, encoding 2-1 fails. diff --git a/tests/http.test b/tests/http.test index 2fd5af4..3b2963e 100644 --- a/tests/http.test +++ b/tests/http.test @@ -664,13 +664,11 @@ test http-7.3 {http::formatQuery} -setup { test http-7.4 {http::formatQuery} -setup { set enc [http::config -urlencoding] } -body { - # this would be reverting to http <=2.4 behavior w/o errors - # (unknown chars become '?') http::config -urlencoding "iso8859-1" http::mapReply "∈" } -cleanup { http::config -urlencoding $enc -} -result {%3F} +} -errorCode {TCL ENCODING ILLEGALSEQUENCE 0} -result {unexpected character at index 0: 'U+002208'} package require tcl::idna 1.0 diff --git a/tests/io.test b/tests/io.test index 0ef3422..9b7a34a 100644 --- a/tests/io.test +++ b/tests/io.test @@ -268,7 +268,7 @@ test io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} { close $f set x } "\r\n12" -test io-3.4 {WriteChars: loop over stage buffer} { +test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] @@ -277,8 +277,8 @@ test io-3.4 {WriteChars: loop over stage buffer} { set x [list [contents $path(test1)]] close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] -test io-3.5 {WriteChars: saved != 0} { +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} +test io-3.5 {WriteChars: saved != 0} -body { # Bytes produced by UtfToExternal from end of last channel buffer # had to be moved to beginning of next channel buffer to preserve # requested buffersize. @@ -289,7 +289,7 @@ test io-3.5 {WriteChars: saved != 0} { set x [list [contents $path(test1)]] close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { # One incomplete UTF-8 character at end of staging buffer. Backup # in src to the beginning of that UTF-8 character and try again. @@ -307,7 +307,7 @@ test io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { close $f lappend x [contents $path(test1)] } [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"] -test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { +test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # When translating UTF-8 to external, the produced bytes went past end # of the channel buffer. This is done purpose -- we then truncate the # bytes at the end of the partial character to preserve the requested @@ -320,7 +320,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} { set x [list [contents $path(test1)]] close $f lappend x [contents $path(test1)] -} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"] +} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test io-3.8 {WriteChars: reset sawLF after each buffer} { set f [open $path(test1) w] fconfigure $f -encoding ascii -buffering line -translation lf \ @@ -1532,7 +1532,7 @@ test io-12.8 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 160 -test io-12.9 {ReadChars: multibyte chars split} { +test io-12.9 {ReadChars: multibyte chars split} knownBug { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 @@ -1543,7 +1543,7 @@ test io-12.9 {ReadChars: multibyte chars split} { close $f scan [string index $in end] %c } 194 -test io-12.10 {ReadChars: multibyte chars split} { +test io-12.10 {ReadChars: multibyte chars split} knownBug { set f [open $path(test1) w] fconfigure $f -translation binary puts -nonewline $f [string repeat a 9]\xC2 diff --git a/tests/main.test b/tests/main.test index 2d3f63c..4aadd79 100644 --- a/tests/main.test +++ b/tests/main.test @@ -153,7 +153,7 @@ namespace eval ::tcl::test::main { puts -nonewline $f {puts [string equal \u20ac } puts $f "€]" close $f - catch {set f [open "|[list [interpreter] -encoding ascii script]" r]} + catch {set f [open "|[list [interpreter] -encoding iso8859-1 script]" r]} } -body { read $f } -cleanup { diff --git a/tests/safe.test b/tests/safe.test index 773b16f..5f3eae8 100644 --- a/tests/safe.test +++ b/tests/safe.test @@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup { interp eval $i encoding convertfrom } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data"} +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"} test safe-11.7.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertfrom ?encoding? data" +} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data" while executing "encoding convertfrom" invoked from within @@ -1291,7 +1291,7 @@ test safe-11.8 {testing safe encoding} -setup { interp eval $i encoding convertto } -returnCodes error -cleanup { safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data"} +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"} test safe-11.8.1 {testing safe encoding} -setup { set i [safe::interpCreate] } -body { @@ -1300,7 +1300,7 @@ test safe-11.8.1 {testing safe encoding} -setup { } -returnCodes ok -match glob -cleanup { unset -nocomplain m o safe::interpDelete $i -} -result {wrong # args: should be "encoding convertto ?encoding? data" +} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data" while executing "encoding convertto" invoked from within diff --git a/tests/source.test b/tests/source.test index c436317..98aaee2 100644 --- a/tests/source.test +++ b/tests/source.test @@ -283,11 +283,11 @@ test source-7.6 {source -encoding: mismatch encoding error} -setup { puts $f "proc € {} {return foo}" close $f } -body { - source -encoding ascii $sourcefile + source -encoding iso8859-1 $sourcefile € } -cleanup { removeFile source.file -} -returnCodes error -match glob -result {invalid command name*} +} -returnCodes error -result {invalid command name "€"} test source-8.1 {source and coroutine/yield} -setup { set sourcefile [makeFile {} source.file] diff --git a/win/rules.vc b/win/rules.vc index 713e7f9..3107756 100644 --- a/win/rules.vc +++ b/win/rules.vc @@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.) # configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally.
-# (Not needed for Tcl 9.x)
+# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1 !if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
-!if "$(TCL_UTF_MAX)" == "4"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4
-!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
+!if "$(TCL_UTF_MAX)" == "3"
+OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
+!endif
# Like the TEA system only set this non empty for non-Tk extensions
# Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME
@@ -1471,8 +1470,8 @@ cdebug = $(cdebug) -Zi !endif # $(DEBUG)
-# cwarn includes default warning levels.
-cwarn = $(WARNINGS)
+# cwarn includes default warning levels, also C4146 is useless.
+cwarn = $(WARNINGS) -wd4146
!if "$(MACHINE)" == "AMD64" || "$(MACHINE)" == "ARM64"
# Disable pointer<->int warnings related to cast between different sizes
|
