diff options
-rw-r--r-- | doc/Encoding.3 | 8 | ||||
-rw-r--r-- | doc/OpenFileChnl.3 | 15 | ||||
-rw-r--r-- | doc/encoding.n | 15 | ||||
-rw-r--r-- | generic/tcl.decls | 5 | ||||
-rw-r--r-- | generic/tcl.h | 20 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 2 | ||||
-rw-r--r-- | generic/tclDecls.h | 8 | ||||
-rw-r--r-- | generic/tclEncoding.c | 76 | ||||
-rw-r--r-- | generic/tclIO.c | 45 | ||||
-rw-r--r-- | generic/tclIOCmd.c | 33 | ||||
-rw-r--r-- | generic/tclInt.h | 1 | ||||
-rw-r--r-- | generic/tclStubInit.c | 2 | ||||
-rw-r--r-- | library/http/http.tcl | 12 | ||||
-rw-r--r-- | tests/chanio.test | 2 | ||||
-rw-r--r-- | tests/encoding.test | 160 | ||||
-rw-r--r-- | tests/encodingVectors.tcl | 2 | ||||
-rw-r--r-- | tests/io.test | 56 | ||||
-rw-r--r-- | tests/ioCmd.test | 8 | ||||
-rw-r--r-- | tests/utfext.test | 2 | ||||
-rw-r--r-- | tests/winConsole.test | 4 | ||||
-rw-r--r-- | tests/zlib.test | 4 |
21 files changed, 235 insertions, 245 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 942d4c1..cb69b0f 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -105,9 +105,9 @@ byte is converted and then to reset to an initial state. The \fBTCL_PROFILE_*\fR bits defined in the \fBPROFILES\fR section below control the encoding profile to be used for dealing with invalid data or other errors in the encoding transform. -\fBTCL_ENCODING_STOPONERROR\fR is present for backward compatibility with -Tcl 8.6 and forces the encoding profile to \fBstrict\fR. - +The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, +it only has meaning in Tcl 8.x. +.PP Some flags bits may not be usable with some functions as noted in the function descriptions below. .AP Tcl_EncodingState *statePtr in/out @@ -590,7 +590,7 @@ with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, \fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR. These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles respectively. If none are specified, a version-dependent default profile is used. -For Tcl 8.7, the default profile is \fBtcl8\fR. +For Tcl 9.0, the default profile is \fBstrict\fR. .PP For details about profiles, see the \fBPROFILES\fR section in the documentation of the \fBencoding\fR command. diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 1b9d5d3..cac1723 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -9,7 +9,7 @@ .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels +Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_InputEncodingError, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels .SH SYNOPSIS .nf \fB#include <tcl.h>\fR @@ -90,6 +90,9 @@ int \fBTcl_InputBuffered\fR(\fIchannel\fR) .sp int +\fBTcl_InputEncodingError\fR(\fIchannel\fR) +.sp +int \fBTcl_OutputBuffered\fR(\fIchannel\fR) .sp long long @@ -476,12 +479,20 @@ that can be retrieved with \fBTcl_GetErrno\fR. \fBTcl_GetsObj\fR also returns TCL_INDEX_NONE if the end of the file is reached; the \fBTcl_Eof\fR procedure can be used to distinguish an error from an end-of-file condition. .PP -If the channel is in nonblocking mode, the return value can also be TCL_INDEX_NONE +If the channel is in blocking mode, the return value can also be TCL_INDEX_NONE if no data was available or the data that was available did not contain an end-of-line character. When TCL_INDEX_NONE is returned, the \fBTcl_InputBlocked\fR procedure may be invoked to determine if the channel is blocked because of input unavailability. .PP +If the channel is in blocking mode, it might be that there is data available +but - at the same time - an encoding error occurred. In that case, the +POSIX error EILSEQ will be recorded, but - since \fBTcl_Gets\fR/\fBTcl_Read\fR +didn't return TCL_INDEX_NONE we cannot be sure if the POSIX error +maybe was a left-over from an earlier error. The only way to be sure +is calling the \fBTcl_InputEncodingError\fR procedure, it will +return 1 if the channel is at an encoding error position. +.PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by \fIlineRead\fR rather than a Tcl value. diff --git a/doc/encoding.n b/doc/encoding.n index c881d26..b216ebe 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -110,9 +110,14 @@ encoding. The following profiles are currently implemented. .VS "TCL8.7 TIP656" .TP +\fBstrict\fR +. +The default profile. The operation fails when invalid data for the encoding +are encountered. +.TP \fBtcl8\fR . -The default profile. Provides for behaviour identical to that of Tcl 8.6: When +Provides for behaviour identical to that of Tcl 8.6: When decoding, for encodings \fBother than utf-8\fR, each invalid byte is interpreted as the Unicode value given by that one byte. For example, the byte 0x80, which is invalid in the ASCII encoding would be mapped to the Unicode value U+0080. @@ -126,10 +131,6 @@ an additional special case, the sequence 0xC0 0x80 is mapped to U+0000. When encoding, each character that cannot be represented in the encoding is replaced by an encoding-dependent character, usually the question mark \fB?\fR. .TP -\fBstrict\fR -. -The operation fails when invalid data for the encoding are encountered. -.TP \fBreplace\fR . When decoding, invalid bytes are replaced by U+FFFD, the Unicode REPLACEMENT @@ -179,7 +180,7 @@ unexpected byte sequence starting at index 1: '\ex80' Example 3: Get partial data and the error location: .PP .CS -% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80] +% codepoints [encoding convertfrom -failindex idx ascii AB\ex80] U+000041 U+000042 % set idx 2 @@ -192,7 +193,7 @@ Example 4: Encode a character that is not representable in ISO8859-1: A? % encoding convertto -profile strict iso8859-1 A\eu0141 unexpected character at index 1: 'U+000141' -% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141 +% encoding convertto -failindex idx iso8859-1 A\eu0141 A % set idx 1 diff --git a/generic/tcl.decls b/generic/tcl.decls index e50e3de..e050aa4 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2626,6 +2626,11 @@ declare 685 { Tcl_Obj *Tcl_DStringToObj(Tcl_DString *dsPtr) } +# TIP 657 +declare 686 { + int Tcl_InputEncodingError(Tcl_Channel chan) +} + # ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # declare 687 { diff --git a/generic/tcl.h b/generic/tcl.h index 946d7b9..caa33b4 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1927,8 +1927,6 @@ typedef struct Tcl_EncodingType { * reset to an initial state. If the source * buffer contains the entire input stream to be * converted, this flag should be set. - * TCL_ENCODING_STRICT - Be more strict in accepting what - * is considered a 'invalid byte sequence'. * TCL_ENCODING_STOPONERROR - Not used any more. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need @@ -1964,15 +1962,10 @@ typedef struct Tcl_EncodingType { /* Internal use bits, do not define bits in this space. See above comment */ #define TCL_ENCODING_INTERNAL_USE_MASK 0xFF00 /* Reserve top byte for profile values (disjoint, not a mask) */ +#define TCL_ENCODING_PROFILE_STRICT TCL_ENCODING_STOPONERROR #define TCL_ENCODING_PROFILE_TCL8 0x01000000 -#define TCL_ENCODING_PROFILE_STRICT 0x02000000 -#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 -/* Still being argued - For Tcl9, is the default strict? TODO */ -#if TCL_MAJOR_VERSION < 9 -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ -#endif +#define TCL_ENCODING_PROFILE_REPLACE 0x02000000 +#define TCL_ENCODING_PROFILE_DEFAULT 0 /* * The following definitions are the error codes returned by the conversion @@ -1993,13 +1986,10 @@ typedef struct Tcl_EncodingType { * TCL_CONVERT_SYNTAX - The source stream contained an invalid * character sequence. This may occur if the * input stream has been damaged or if the input - * encoding method was misidentified. This error - * is reported unless if TCL_ENCODING_NOCOMPLAIN - * was specified. + * encoding method was misidentified. * TCL_CONVERT_UNKNOWN - The source string contained a character that * could not be represented in the target - * encoding. This error is reported unless if - * TCL_ENCODING_NOCOMPLAIN was specified. + * encoding. */ #define TCL_CONVERT_MULTIBYTE (-1) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 5dbadb8..8a5c9ce 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -436,7 +436,7 @@ EncodingConvertParseOptions ( Tcl_Obj *dataObj; Tcl_Obj *failVarObj; #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ + int profile = TCL_ENCODING_PROFILE_STRICT; #else int profile = TCL_ENCODING_PROFILE_TCL8; #endif diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ec9a49a..99661f4 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1861,7 +1861,8 @@ EXTERN int Tcl_GetWideUIntFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 685 */ EXTERN Tcl_Obj * Tcl_DStringToObj(Tcl_DString *dsPtr); -/* Slot 686 is reserved */ +/* 686 */ +EXTERN int Tcl_InputEncodingError(Tcl_Channel chan); /* 687 */ EXTERN void TclUnusedStubEntry(void); @@ -2561,7 +2562,7 @@ typedef struct TclStubs { Tcl_Size (*tcl_GetEncodingNulLength) (Tcl_Encoding encoding); /* 683 */ int (*tcl_GetWideUIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 684 */ Tcl_Obj * (*tcl_DStringToObj) (Tcl_DString *dsPtr); /* 685 */ - void (*reserved686)(void); + int (*tcl_InputEncodingError) (Tcl_Channel chan); /* 686 */ void (*tclUnusedStubEntry) (void); /* 687 */ } TclStubs; @@ -3887,7 +3888,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_GetWideUIntFromObj) /* 684 */ #define Tcl_DStringToObj \ (tclStubsPtr->tcl_DStringToObj) /* 685 */ -/* Slot 686 is reserved */ +#define Tcl_InputEncodingError \ + (tclStubsPtr->tcl_InputEncodingError) /* 686 */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 687 */ diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a87eb7f..5c240e3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -199,18 +199,11 @@ static struct TclEncodingProfiles { {"strict", TCL_ENCODING_PROFILE_STRICT}, {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; -#define PROFILE_TCL8(flags_) \ - ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_TCL8) \ - || (CHANNEL_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_TCL8)) #define PROFILE_STRICT(flags_) \ - ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ - || (CHANNEL_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) + #define PROFILE_REPLACE(flags_) \ - ((CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ - || (CHANNEL_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + (CHANNEL_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -1171,10 +1164,6 @@ Tcl_ExternalToUtfDString( * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * Any other flag bits will cause an error to be returned (for future - * compatibility) * * Results: * The return value is one of @@ -1501,8 +1490,6 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * * Results: * The return value is one of @@ -2452,7 +2439,6 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2520,7 +2506,6 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2740,7 +2725,6 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch = 0, bytesLeft = srcLen % 4; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2906,7 +2890,6 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3004,7 +2987,6 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3084,7 +3066,7 @@ Utf16ToUtfProc( *dst++ = (ch & 0xFF); } else if (HIGH_SURROGATE(prev) || HIGH_SURROGATE(ch)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); - } else if (LOW_SURROGATE(ch) && !PROFILE_TCL8(flags)) { + } else if (LOW_SURROGATE(ch) && (PROFILE_STRICT(flags) || PROFILE_REPLACE(flags))) { /* Lo surrogate not preceded by Hi surrogate and not tcl8 profile */ if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_UNKNOWN; @@ -3185,7 +3167,6 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3291,7 +3272,6 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3414,7 +3394,6 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3548,7 +3527,6 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3655,7 +3633,6 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3740,7 +3717,6 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3888,7 +3864,6 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -4112,7 +4087,6 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4561,48 +4535,6 @@ TclEncodingProfileIdToName( /* *------------------------------------------------------------------------ * - * TclEncodingSetProfileFlags -- - * - * Maps the flags supported in the encoding C API's to internal flags. - * - * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is - * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile - * specified. - * - * If no profile or an invalid profile is specified, it is set to - * the default. - * - * Results: - * Internal encoding flag mask. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------ - */ -int TclEncodingSetProfileFlags(int flags) -{ - if (flags & TCL_ENCODING_STOPONERROR) { - CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); - } else { - int profile = CHANNEL_PROFILE_GET(flags); - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - case TCL_ENCODING_PROFILE_STRICT: - case TCL_ENCODING_PROFILE_REPLACE: - break; - case 0: /* Unspecified by caller */ - default: - CHANNEL_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); - break; - } - } - return flags; -} - -/* - *------------------------------------------------------------------------ - * * TclGetEncodingProfiles -- * * Get the list of supported encoding profiles. diff --git a/generic/tclIO.c b/generic/tclIO.c index 5414e73..7848fa4 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1677,12 +1677,10 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; - CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + CHANNEL_PROFILE_SET(statePtr->inputEncodingFlags, 0); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + CHANNEL_PROFILE_SET(statePtr->outputEncodingFlags, 0); /* * Set the channel up initially in AUTO input translation mode to accept @@ -5012,7 +5010,9 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && (copiedTotal == 0 || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); - copiedTotal = -1; + if (copiedTotal == 0) { + copiedTotal = -1; + } } return copiedTotal; } @@ -6133,7 +6133,9 @@ finish: if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && (!copied || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); - copied = -1; + if (!copied) { + copied = -1; + } } TclChannelRelease((Tcl_Channel)chanPtr); return copied; @@ -7620,6 +7622,32 @@ Tcl_InputBuffered( return bytesBuffered; } + +/* + *---------------------------------------------------------------------- + * + * Tcl_InputEncodingError -- + * + * Returns 1 if input is in an encoding error position, 0 otherwise. + * + * Results: + * 0 or 1, always. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_InputEncodingError( + Tcl_Channel chan) /* Is this channel blocked? */ +{ + ChannelState *statePtr = ((Channel *) chan)->state; + /* State of real channel structure. */ + + return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0; +} /* *---------------------------------------------------------------------- @@ -10011,7 +10039,8 @@ CopyData( * - EOF is reached on the channel; or * - the channel is non-blocking, and we've read all we can * without blocking. - * - a channel reading error occurs (and we return TCL_INDEX_NONE) + * - a channel reading error occurs (and we return TCL_INDEX_NONE + * or - in case of encoding error - the data so far) * * Side effects: * May cause input to be buffered. @@ -10237,7 +10266,7 @@ DoRead( == (CHANNEL_EOF|CHANNEL_BLOCKED))); UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); - return (int)(p - dst); + return (Tcl_Size)(p - dst); } /* diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index cdb8083..32edc45 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -316,14 +316,22 @@ Tcl_GetsObjCmd( */ if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto getsError; } code = TCL_ERROR; goto done; } lineLen = TCL_IO_FAILURE; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), linePtr); + Tcl_SetReturnOptions(interp, returnOpts); + getsError: + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + code = TCL_ERROR; + goto done; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, @@ -441,13 +449,24 @@ Tcl_ReadObjCmd( * regular message if nothing was found in the bypass. */ + Tcl_DecrRefCount(resultPtr); if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto readError; } TclChannelRelease(chan); - Tcl_DecrRefCount(resultPtr); + return TCL_ERROR; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetReturnOptions(interp, returnOpts); + readError: + TclChannelRelease(chan); return TCL_ERROR; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 50d992c..67424ad 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2886,7 +2886,6 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index dbd8b52..05f0ac7 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1492,7 +1492,7 @@ const TclStubs tclStubs = { Tcl_GetEncodingNulLength, /* 683 */ Tcl_GetWideUIntFromObj, /* 684 */ Tcl_DStringToObj, /* 685 */ - 0, /* 686 */ + Tcl_InputEncodingError, /* 686 */ TclUnusedStubEntry, /* 687 */ }; diff --git a/library/http/http.tcl b/library/http/http.tcl index f2ad0a0..89334ca 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1747,7 +1747,7 @@ proc http::OpenSocket {token DoLater} { fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 + fconfigure $sock -profile replace \ } ##Log socket opened, DONE fconfigure - token $token } @@ -2168,7 +2168,7 @@ proc http::Connected {token proto phost srvurl} { fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 + fconfigure $sock -profile replace \ } # The following is disallowed in safe interpreters, but the socket is @@ -2561,7 +2561,7 @@ proc http::ReceiveResponse {token} { fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile tcl8 + fconfigure $sock -profile replace \ } Log ^D$tk begin receiving response - token $token @@ -4555,7 +4555,7 @@ proc http::Eot {token {reason {}}} { set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { if {[package vsatisfies [package provide Tcl] 9.0-]} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + set state(body) [encoding convertfrom -profile replace $enc $state(body)] } else { set state(body) [encoding convertfrom $enc $state(body)] } @@ -4642,7 +4642,7 @@ proc http::GuessType {token} { return 0 } if {[package vsatisfies [package provide Tcl] 9.0-]} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + set state(body) [encoding convertfrom -profile replace $enc $state(body)] } else { set state(body) [encoding convertfrom $enc $state(body)] } @@ -4727,7 +4727,7 @@ proc http::quoteString {string} { # than [regsub]/[subst]). [Bug 1020491] if {[package vsatisfies [package provide Tcl] 9.0-]} { - set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] + set string [encoding convertto -profile replace $http(-urlencoding) $string] } else { set string [encoding convertto $http(-urlencoding) $string] } diff --git a/tests/chanio.test b/tests/chanio.test index 09e71ca..d2d96d1 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1098,7 +1098,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { chan puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" chan close $f set f [open $path(test1)] - chan configure $f -encoding shiftjis + chan configure $f -encoding shiftjis -profile tcl8 lappend x [chan gets $f line] $line lappend x [chan tell $f] [testchannel inputbuffered $f] [chan eof $f] lappend x [chan gets $f line] $line diff --git a/tests/encoding.test b/tests/encoding.test index 35340a6..0d8fdfe 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -465,22 +465,22 @@ test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 test encoding-15.26 {UtfToUtfProc CESU-8} { - encoding convertfrom cesu-8 \xC0\x80 + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 -test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { - encoding convertfrom -profile strict cesu-8 \x00 +test encoding-15.27 {UtfToUtfProc CESU-8} { + encoding convertfrom cesu-8 \x00 } \x00 -test encoding-15.28 {UtfToUtfProc -profile strict CESU-8} -body { - encoding convertfrom -profile strict cesu-8 \xC0\x80 +test encoding-15.28 {UtfToUtfProc CESU-8} -body { + encoding convertfrom cesu-8 \xC0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-15.29 {UtfToUtfProc CESU-8} { encoding convertto cesu-8 \x00 } \x00 -test encoding-15.30 {UtfToUtfProc -profile strict CESU-8} { - encoding convertto -profile strict cesu-8 \x00 +test encoding-15.30 {UtfToUtfProc CESU-8} { + encoding convertto cesu-8 \x00 } \x00 -test encoding-15.31 {UtfToUtfProc -profile strict CESU-8 (bytes F0-F4 are invalid)} -body { - encoding convertfrom -profile strict cesu-8 \xF1\x86\x83\x9C +test encoding-15.31 {UtfToUtfProc CESU-8 (bytes F0-F4 are invalid)} -body { + encoding convertfrom cesu-8 \xF1\x86\x83\x9C } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF1'} test encoding-16.1 {Utf16ToUtfProc} -body { @@ -492,7 +492,7 @@ test encoding-16.2 {Utf16ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xDC\xDC"] + set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" test encoding-16.4 {Ucs2ToUtfProc} -body { @@ -516,31 +516,31 @@ test encoding-16.8 {Utf32ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 } -result \uD800 test encoding-16.10 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00 } -result \uDC00 test encoding-16.11 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 } -result \uD800\uDC00 test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xD8 } -result \uD800 test encoding-16.14 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC + encoding convertfrom -profile tcl8 utf-16le \x00\xDC } -result \uDC00 test encoding-16.15 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC } -result \U010000 test encoding-16.16 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC\x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { - list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] + list [encoding convertfrom -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] } -result {A 4} test encoding-16.18 { @@ -563,25 +563,25 @@ test encoding-16.18 { } [namespace current]] } -result done test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-16 "\x41\x41\x41" + encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD -test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { - encoding convertfrom utf-16 "\xD8\xD8" +test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile tcl8 utf-16 "\xD8\xD8" } -result \uD8D8 test encoding-16.21 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" + encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { - encoding convertfrom -profile strict utf-16le \x00\xD8 + encoding convertfrom utf-16le \x00\xD8 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { - encoding convertfrom -profile strict utf-16le \x00\xDC + encoding convertfrom utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-16.25 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32 "\x01\x00\x00\x01" + encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01" } -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { @@ -603,60 +603,60 @@ test encoding-17.6 {UtfToUtf16Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { - encoding convertto -profile strict utf-16be "\uDCDC" + encoding convertto utf-16be "\uDCDC" } -returnCodes error -result {unexpected character at index 0: 'U+00DCDC'} test encoding-17.8 {UtfToUtf16Proc} -body { - encoding convertto -profile strict utf-16le "\uD8D8" + encoding convertto utf-16le "\uD8D8" } -returnCodes error -result {unexpected character at index 0: 'U+00D8D8'} test encoding-17.9 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-17.10 {Utf32ToUtfProc} -body { encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-17.11 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32le "\x00\xD8\x00\x00" + encoding convertfrom utf-32le "\x00\xD8\x00\x00" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-17.12 {Utf32ToUtfProc} -body { - encoding convertfrom -profile strict utf-32le "\x00\xDC\x00\x00" + encoding convertfrom utf-32le "\x00\xDC\x00\x00" } -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -} -result {0 !)} -test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { - list [catch {encoding convertto -profile strict jis0208 \\} res] $res } -result {1 {unexpected character at index 0: 'U+00005C'}} -test encoding-18.3 {TableToUtfProc on invalid input with -profile strict -failindex} -body { - list [catch {encoding convertto -profile strict -failindex pos jis0208 \\} res] $res $pos +test encoding-18.2 {TableToUtfProc on invalid input with} -body { + list [catch {encoding convertto jis0208 \\} res] $res +} -result {1 {unexpected character at index 0: 'U+00005C'}} +test encoding-18.3 {TableToUtfProc on invalid input with -failindex} -body { + list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} -test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile strict} -body { - list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos +test encoding-18.4 {TableToUtfProc on invalid input with -failindex} -body { + list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos -} -result {0 !) -1} +} -result {0 {} 0} test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { - encoding convertfrom ascii AÁ + encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.3 {TableFromUtfProc} -body { - encoding convertfrom -profile strict ascii AÁ + encoding convertfrom ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx ascii AÁ] [set idx] -} -result [list A\xC1 -1] +} -result {A 1} test encoding-19.5 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] + list [encoding convertfrom -failindex idx ascii AÁ] [set idx] } -result {A 1} test encoding-19.6 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx -profile strict ascii AÁB] [set idx] + list [encoding convertfrom -failindex idx ascii AÁB] [set idx] } -result {A 1} test encoding-20.1 {TableFreefProc} { @@ -766,7 +766,7 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { } [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] test encoding-24.4 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] @@ -790,16 +790,16 @@ test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 test encoding-24.12 {Parse valid or invalid utf-8} -body { - encoding convertfrom -profile strict utf-8 "\xC0\x81" + encoding convertfrom utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} test encoding-24.13 {Parse valid or invalid utf-8} -body { - encoding convertfrom -profile strict utf-8 "\xC1\xBF" + encoding convertfrom utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "Z\xE0\x80" + encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] @@ -810,11 +810,8 @@ test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring - test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"] } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" -test encoding-24.19.1 {Parse valid or invalid utf-8} -body { - encoding convertto -profile tcl8 utf-8 "ZX\uD800" -} -result ZX\xED\xA0\x80 -test encoding-24.19.2 {Parse valid or invalid utf-8} -body { - encoding convertto -profile strict utf-8 "ZX\uD800" +test encoding-24.19 {Parse valid or invalid utf-8} -body { + encoding convertto utf-8 "ZX\uD800" } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { encoding convertfrom -profile tcl8 "\x20" @@ -828,26 +825,26 @@ test encoding-24.22 {Syntax error, two encodings} -body { test encoding-24.23 {Syntax error, two encodings} -body { encoding convertto iso8859-1 utf-8 "ZX\uD800" } -result {bad option "iso8859-1": must be -profile or -failindex} -returnCodes error -test encoding-24.24 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xC0\x80\x00\x00" +test encoding-24.24 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 "\xC0\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.25 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\x40\x80\x00\x00" +test encoding-24.25 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 "\x40\x80\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\x80'} -test encoding-24.26 {Parse valid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xF1\x80\x80\x80" +test encoding-24.26 {Parse valid utf-8 with} -body { + encoding convertfrom utf-8 "\xF1\x80\x80\x80" } -result \U40000 -test encoding-24.27 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xF0\x80\x80\x80" +test encoding-24.27 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 "\xF0\x80\x80\x80" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xF0'} -test encoding-24.28 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 "\xFF\x00\x00" +test encoding-24.28 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 "\xFF\x00\x00" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xFF'} test encoding-24.29 {Parse invalid utf-8} -body { encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF -test encoding-24.30 {Parse noncharacter with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xEF\xBF\xBF +test encoding-24.30 {Parse noncharacter with} -body { + encoding convertfrom utf-8 \xEF\xBF\xBF } -result \uFFFF test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 \xEF\xBF\xBF @@ -855,35 +852,32 @@ test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { - encoding convertto -profile strict utf-8 \uFFFF +test encoding-24.33 {Try to generate noncharacter with} -body { + encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { - encoding convertfrom utf-8 \xED\xA0\x80 + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xED\xA0\x80 +test encoding-24.36 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 \xED\xA0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 -test encoding-24.38.1 {Try to generate invalid utf-8} -body { - encoding convertto -profile tcl8 utf-8 \uD800 -} -result \xED\xA0\x80 -test encoding-24.38.2 {Try to generate invalid utf-8} -body { - encoding convertto -profile strict utf-8 \uD800 +test encoding-24.38 {Try to generate invalid utf-8} -body { + encoding convertto utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { - encoding convertto -profile strict utf-8 \uD800 +test encoding-24.39 {Try to generate invalid utf-8 with} -body { + encoding convertto utf-8 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.40 {Try to generate invalid utf-8 with -profile tcl8} -body { encoding convertto -profile tcl8 utf-8 \uD800 } -result \xED\xA0\x80 -test encoding-24.41 {Parse invalid utf-8 with -profile strict} -body { - encoding convertfrom -profile strict utf-8 \xED\xA0\x80\xED\xB0\x80 +test encoding-24.41 {Parse invalid utf-8 with} -body { + encoding convertfrom utf-8 \xED\xA0\x80\xED\xB0\x80 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xED'} test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -profile tcl8 utf-8 \xF0\x80\x80\x80 @@ -891,11 +885,11 @@ test encoding-24.42 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body test encoding-24.43 {Parse invalid utf-8, fallback to cp1252 [885c86a9a0]} -body { encoding convertfrom -profile tcl8 utf-8 \x80 } -result \u20AC -test encoding-24.44 {Try to generate invalid ucs-2 with -profile strict} -body { - encoding convertto -profile strict ucs-2 \uD800 +test encoding-24.44 {Try to generate invalid ucs-2 with} -body { + encoding convertto ucs-2 \uD800 } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} -test encoding-24.45 {Try to generate invalid ucs-2 with -profile strict} -body { - encoding convertto -profile strict ucs-2 \U10000 +test encoding-24.45 {Try to generate invalid ucs-2 with} -body { + encoding convertto ucs-2 \U10000 } -returnCodes 1 -result {unexpected character at index 0: 'U+010000'} file delete [file join [temporaryDirectory] iso2022.txt] diff --git a/tests/encodingVectors.tcl b/tests/encodingVectors.tcl index 1b569a1..2f97d1f 100644 --- a/tests/encodingVectors.tcl +++ b/tests/encodingVectors.tcl @@ -10,7 +10,7 @@ # List of defined encoding profiles set encProfiles {tcl8 strict replace} -set encDefaultProfile tcl8; # Should reflect the default from implementation +set encDefaultProfile strict; # Should reflect the default from implementation # encValidStrings - Table of valid strings. # diff --git a/tests/io.test b/tests/io.test index 96e5ea6..2f2adc3 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1195,7 +1195,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" close $f set f [open $path(test1)] - fconfigure $f -encoding shiftjis + fconfigure $f -encoding shiftjis -profile tcl8 set x [list [gets $f line] $line] lappend x [tell $f] [testchannel inputbuffered $f] [eof $f] lappend x [gets $f line] $line @@ -1620,7 +1620,7 @@ test io-12.9 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 10 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c @@ -1633,7 +1633,7 @@ test io-12.10 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 11 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 11 set in [read $f] close $f scan [string index $in end] %c @@ -7926,6 +7926,8 @@ test io-53.5 {CopyData: error during fcopy} {socket fcopy} { set out [socket 127.0.0.1 [lindex [fconfigure $listen -sockname] 2]] catch {unset fcopyTestDone} close $listen ;# This means the socket open never really succeeds + fconfigure $in -encoding utf-8 + fconfigure $out -encoding utf-8 fcopy $in $out -command [namespace code FcopyTestDone] variable fcopyTestDone if {![info exists fcopyTestDone]} { @@ -9123,7 +9125,7 @@ test io-74.1 {[104f2885bb] improper cache validity check} -setup { removeFile io-74.1 } -returnCodes error -match glob -result {can not find channel named "*"} -test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainencoding 1)} -setup { +test io-75.1 {multibyte encoding error read results in raw bytes (-profile tcl8)} -setup { set fn [makeFile {} io-75.1] set f [open $fn w+] fconfigure $f -encoding binary @@ -9214,7 +9216,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 -test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { +test io-75.6 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.6] set f [open $fn w+] fconfigure $f -encoding binary @@ -9224,13 +9226,18 @@ test io-75.6 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { - gets $f + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result } -cleanup { close $f removeFile io-75.6 -} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} +} -match glob -result {A {error reading "*": invalid or incomplete multibyte or wide character}} -test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -setup { +test io-75.7 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.7] set f [open $fn w+] fconfigure $f -encoding binary @@ -9240,11 +9247,18 @@ test io-75.7 {invalid utf-8 encoding gets is not ignored (-profile strict)} -set seek $f 0 fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { - read $f + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result + fconfigure $f -encoding iso8859-1 + lappend data [read $f] } -cleanup { close $f removeFile io-75.7 -} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} +} -match glob -result "A {error reading \"*\": invalid or incomplete multibyte or wide character} \x81" test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] @@ -9290,7 +9304,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9313,16 +9327,13 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -blocking 0 -eofchar "" -translation lf -profile strict + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict } -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {set d [read $f]} msg] - lappend hd $msg + read $f } -cleanup { close $f removeFile io-75.11 -} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}} +} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} test io-75.12 {invalid utf-8 encoding read is ignored} -setup { set fn [makeFile {} io-75.12] @@ -9331,7 +9342,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] binary scan $d H* hd @@ -9348,16 +9359,13 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -blocking 0 -eofchar "" -translation lf -profile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { - set d [read $f] - binary scan $d H* hd - lappend hd [catch {read $f} msg] - lappend hd $msg + read $f } -cleanup { close $f removeFile io-75.13 -} -match glob -result {41 1 {error reading "*": invalid or incomplete multibyte or wide character}} +} -match glob -returnCodes 1 -result {error reading "*": invalid or incomplete multibyte or wide character} # ### ### ### ######### ######### ######### diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 89c6e76..82b3250 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -240,11 +240,11 @@ test iocmd-8.7 {fconfigure command} -setup { file delete $path(test1) } -body { set f1 [open $path(test1) w] - fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 -profile tcl8 + fconfigure $f1 -translation lf -eofchar {} -encoding utf-16 fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile strict -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} @@ -262,11 +262,11 @@ test iocmd-8.9 {fconfigure command} -setup { } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation binary -buffering none -buffersize 4040 \ - -eofchar {} -encoding binary -profile tcl8 + -eofchar {} -encoding binary fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile tcl8 -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile strict -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} diff --git a/tests/utfext.test b/tests/utfext.test index b980800..de26b6f 100644 --- a/tests/utfext.test +++ b/tests/utfext.test @@ -84,7 +84,7 @@ foreach {enc utfhex hex} $utfExtMap { } # Test for insufficient space -test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -body { +test xx-bufferoverflow {buffer overflow Tcl_ExternalToUtf} -constraints knownBug -body { testencoding Tcl_UtfToExternal utf-16 A {start end} {} 1 } -result [list nospace {} \xFF] diff --git a/tests/winConsole.test b/tests/winConsole.test index f030444..4eccf81 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -eofchar -inputmode -profile -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -profile -inputmode -translation} set testnum 0 foreach {opt result} { @@ -344,7 +344,7 @@ test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, or -profile, -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index 720fdd6..544e6d4 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile tcl8 -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" |