From 91c305a5f3924fdd07b574ce025113cec013fd06 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 5 May 2023 06:18:55 +0000 Subject: Remove more ... to be split off in separate TIP's --- doc/OpenFileChnl.3 | 4 -- generic/tclCmdAH.c | 4 -- generic/tclIO.c | 103 +++++++++++++++++----------------------------------- generic/tclIOCmd.c | 15 ++------ generic/tclInt.h | 1 - tests/encoding.test | 43 +++++++++++++++------- 6 files changed, 66 insertions(+), 104 deletions(-) diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 4f407b6..3a7b6ae 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -482,10 +482,6 @@ end-of-line character. When -1 is returned, the \fBTcl_InputBlocked\fR procedure may be invoked to determine if the channel is blocked because of input unavailability. .PP -If the channel is in blocking mode, it might be that there is data available -but - at the same time - an encoding error occurred. In that case, the -POSIX error EILSEQ will be recorded. -.PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by \fIlineRead\fR rather than a Tcl value. diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index e8eb26a..ae1ba33 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -435,11 +435,7 @@ EncodingConvertParseOptions ( Tcl_Encoding encoding; Tcl_Obj *dataObj; Tcl_Obj *failVarObj; -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) int profile = TCL_ENCODING_PROFILE_STRICT; -#else - int profile = TCL_ENCODING_PROFILE_TCL8; -#endif /* * Possible combinations: diff --git a/generic/tclIO.c b/generic/tclIO.c index b7282c9..fb399d4 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -223,8 +223,8 @@ static void StopCopy(CopyState *csPtr); static void TranslateInputEOL(ChannelState *statePtr, char *dst, const char *src, int *dstLenPtr, int *srcLenPtr); static void UpdateInterest(Channel *chanPtr); -static int Write(Channel *chanPtr, const char *src, - int srcLen, Tcl_Encoding encoding); +static Tcl_Size Write(Channel *chanPtr, const char *src, + Tcl_Size srcLen, Tcl_Encoding encoding); static Tcl_Obj * FixLevelCode(Tcl_Obj *msg); static void SpliceChannel(Tcl_Channel chan); static void CutChannel(Tcl_Channel chan); @@ -4189,6 +4189,7 @@ Tcl_WriteChars( } objPtr = Tcl_NewStringObj(src, len); + Tcl_IncrRefCount(objPtr); src = (char *) Tcl_GetByteArrayFromObj(objPtr, &len); if (src == NULL) { Tcl_SetErrno(EILSEQ); @@ -4237,7 +4238,7 @@ Tcl_WriteObj( Channel *chanPtr; ChannelState *statePtr; /* State info for channel */ const char *src; - Tcl_Size srcLen; + Tcl_Size srcLen = 0; statePtr = ((Channel *) chan)->state; chanPtr = statePtr->topChanPtr; @@ -4246,31 +4247,20 @@ Tcl_WriteObj( return TCL_INDEX_NONE; } if (statePtr->encoding == NULL) { + Tcl_Size result; + src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen); if (src == NULL) { Tcl_SetErrno(EILSEQ); - return TCL_INDEX_NONE; + result = TCL_INDEX_NONE; + } else { + result = WriteBytes(chanPtr, src, srcLen); } + return result; } else { src = Tcl_GetStringFromObj(objPtr, &srcLen); + return WriteChars(chanPtr, src, srcLen); } - - size_t totalWritten = 0; - /* - * Note original code always called WriteChars even if srcLen 0 - * so we will too. - */ - do { - int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen; - int written; - written = WriteChars(chanPtr, src, chunkSize); - if (written < 0) { - return TCL_INDEX_NONE; - } - totalWritten += written; - srcLen -= chunkSize; - } while (srcLen); - return totalWritten; } static void @@ -4341,17 +4331,18 @@ WillRead( *---------------------------------------------------------------------- */ -static int +static Tcl_Size Write( Channel *chanPtr, /* The channel to buffer output for. */ const char *src, /* UTF-8 string to write. */ - int srcLen, /* Length of UTF-8 string in bytes. */ + Tcl_Size srcLen, /* Length of UTF-8 string in bytes. */ Tcl_Encoding encoding) { ChannelState *statePtr = chanPtr->state; /* State info for channel */ char *nextNewLine = NULL; - int endEncoding, saved = 0, total = 0, flushed = 0, needNlFlush = 0; + int endEncoding, needNlFlush = 0; + Tcl_Size saved = 0, total = 0, flushed = 0; char safe[BUFFER_PADDING]; int encodingError = 0; @@ -4364,7 +4355,6 @@ Write( */ endEncoding = ((statePtr->outputEncodingFlags & TCL_ENCODING_END) != 0); - if (GotFlag(statePtr, CHANNEL_LINEBUFFERED) || (statePtr->outputTranslation != TCL_TRANSLATE_LF)) { nextNewLine = (char *)memchr(src, '\n', srcLen); @@ -4373,7 +4363,8 @@ Write( while (srcLen + saved + endEncoding > 0 && !encodingError) { ChannelBuffer *bufPtr; char *dst; - int result, srcRead, dstLen, dstWrote, srcLimit = srcLen; + int result, srcRead, dstLen, dstWrote; + Tcl_Size srcLimit = srcLen; if (nextNewLine) { srcLimit = nextNewLine - src; @@ -4604,8 +4595,8 @@ Tcl_GetsObj( ChannelState *statePtr = chanPtr->state; /* State info for channel */ ChannelBuffer *bufPtr; - int inEofChar, skip, copiedTotal, oldFlags; - Tcl_Size oldLength, oldRemoved; + int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; + Tcl_Size oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; @@ -4995,13 +4986,11 @@ Tcl_GetsObj( UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && gs.bytesWrote == 0) { - if (bufPtr->nextRemoved != oldRemoved) { - bufPtr->nextRemoved = oldRemoved; - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); - } + bufPtr->nextRemoved = oldRemoved; Tcl_SetErrno(EILSEQ); copiedTotal = -1; } + ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return copiedTotal; } @@ -5463,7 +5452,8 @@ FilterInputBytes( if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) { SetFlag(statePtr, CHANNEL_ENCODING_ERROR); - ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF); + ResetFlag(statePtr, CHANNEL_STICKY_EOF); + ResetFlag(statePtr, CHANNEL_EOF); result = TCL_OK; } @@ -5931,14 +5921,15 @@ DoReadChars( /* State info for channel */ ChannelBuffer *bufPtr; Tcl_Size copied; - int result, copiedNow; + int result; Tcl_Encoding encoding = statePtr->encoding; int binaryMode; #define UTF_EXPANSION_FACTOR 1024 int factor = UTF_EXPANSION_FACTOR; if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { - /* TODO: We don't need this call? */ + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); return -1; @@ -5955,7 +5946,7 @@ DoReadChars( assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR)); - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -5969,7 +5960,7 @@ DoReadChars( } ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; - /* TODO: We don't need this call? */ + /* TODO: UpdateInterest not needed here? */ UpdateInterest(chanPtr); return 0; } @@ -6007,7 +5998,7 @@ DoReadChars( ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; for (copied = 0; toRead > 0 || toRead == TCL_INDEX_NONE; ) { - copiedNow = -1; + int copiedNow = -1; if (statePtr->inQueueHead != NULL) { if (binaryMode) { copiedNow = ReadBytes(statePtr, objPtr, toRead); @@ -6016,7 +6007,7 @@ DoReadChars( } /* - * If the current buffer is empty recycle it. + * Recycle current buffer if empty. */ bufPtr = statePtr->inQueueHead; @@ -6114,10 +6105,9 @@ finish: * succesfully red before the error. Return an error so that callers * like [read] can also return an error. */ + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); Tcl_SetErrno(EILSEQ); - if (!copied) { - copied = -1; - } + copied = -1; } TclChannelRelease((Tcl_Channel)chanPtr); return copied; @@ -7608,32 +7598,6 @@ Tcl_InputBuffered( return bytesBuffered; } - -/* - *---------------------------------------------------------------------- - * - * TclInputEncodingError -- - * - * Returns 1 if input is in an encoding error position, 0 otherwise. - * - * Results: - * 0 or 1, always. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -int -TclInputEncodingError( - Tcl_Channel chan) /* Is this channel blocked? */ -{ - ChannelState *statePtr = ((Channel *) chan)->state; - /* State of real channel structure. */ - - return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0; -} /* *---------------------------------------------------------------------- @@ -10014,8 +9978,7 @@ CopyData( * - EOF is reached on the channel; or * - the channel is non-blocking, and we've read all we can * without blocking. - * - a channel reading error occurs (and we return TCL_INDEX_NONE - * or - in case of encoding error - the data so far) + * - a channel reading error occurs (and we return TCL_INDEX_NONE) * * Side effects: * May cause input to be buffered. diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 4cf4631..93c50ec 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -304,7 +304,7 @@ Tcl_GetsObjCmd( TclChannelPreserve(chan); TclNewObj(linePtr); lineLen = Tcl_GetsObj(chan, linePtr); - if (lineLen == TCL_INDEX_NONE) { + if (lineLen == TCL_IO_FAILURE) { if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) { Tcl_DecrRefCount(linePtr); @@ -323,7 +323,7 @@ Tcl_GetsObjCmd( code = TCL_ERROR; goto done; } - lineLen = TCL_INDEX_NONE; + lineLen = TCL_IO_FAILURE; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, @@ -432,7 +432,7 @@ Tcl_ReadObjCmd( TclNewObj(resultPtr); TclChannelPreserve(chan); charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0); - if (charactersRead == TCL_INDEX_NONE) { + if (charactersRead == TCL_IO_FAILURE) { Tcl_DecrRefCount(resultPtr); /* * TIP #219. @@ -446,15 +446,6 @@ Tcl_ReadObjCmd( "error reading \"%s\": %s", TclGetString(chanObjPtr), Tcl_PosixError(interp))); } - goto readError; - } else if (TclInputEncodingError(chan)) { - Tcl_Obj *returnOpts = Tcl_NewDictObj(); - Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); - Tcl_SetReturnOptions(interp, returnOpts); - readError: TclChannelRelease(chan); return TCL_ERROR; } diff --git a/generic/tclInt.h b/generic/tclInt.h index 03d3e22..436384e 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3224,7 +3224,6 @@ MODULE_SCOPE void TclInitNamespaceSubsystem(void); MODULE_SCOPE void TclInitNotifier(void); MODULE_SCOPE void TclInitObjSubsystem(void); MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp); -MODULE_SCOPE int TclInputEncodingError(Tcl_Channel chan); MODULE_SCOPE int TclIsDigitProc(int byte); MODULE_SCOPE int TclIsBareword(int byte); MODULE_SCOPE Tcl_Obj * TclJoinPath(Tcl_Size elements, Tcl_Obj * const objv[], diff --git a/tests/encoding.test b/tests/encoding.test index 17bf6f5..506ab2c 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -464,7 +464,10 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} { test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 -test encoding-15.26 {UtfToUtfProc CESU-8} { +test {encoding-15.26 cesu-8 tclnull strict} {UtfToUtfProc CESU-8} { + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 +} \x00 +test {encoding-15.26 cesu-8 tclnull tcl8} {UtfToUtfProc CESU-8} { encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { @@ -562,24 +565,35 @@ test encoding-16.18 { return done } [namespace current]] } -result done -test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { +test {encoding-16.19 strict} {Utf16ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile strict utf-16 "\x41\x41\x41" +} -returnCodes 1 -result {unexpected byte sequence starting at index 2: '\x41'} +test {encoding-16.19 tcl8} {Utf16ToUtfProc, bug [d19fe0a5b]} -body { encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD -test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { +test encoding-16.20 {utf16ToUtfProc, bug [d19fe0a5b]} \ + -constraints deprecated -body { encoding convertfrom utf-16 "\xD8\xD8" } -result \uD8D8 -test encoding-16.21 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { +test encoding-16.21.tcl8 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD +test encoding-16.21.strict {Utf32ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile strict utf-32 "\x00\x00\x00\x00\x41\x41" +} -returnCodes 1 -result {unexpected byte sequence starting at index 4: '\x41'} + test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xD8 } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} -test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" -} -result \uFFFD +test {encoding-24.4 utf-8 invalid strict} {Parse invalid utf-8, strict} -body { + string length [encoding convertfrom -profile strict utf-8 "\xC0\x80"] +} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} +test {encoding-24.4 utf-8 invalid tcl8} {UtfToUtfProc utf-8} { + encoding convertfrom -profile tcl8 utf-8 \xC0\x80 +} \x00 test encoding-16.25 {Utf32ToUtfProc} -body { encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01" } -result \uFFFD @@ -789,16 +803,19 @@ test encoding-24.10 {Parse valid or invalid utf-8} { test encoding-24.11 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"] } 1 -test encoding-24.12 {Parse valid or invalid utf-8} -body { +test encoding-24.12 {Parse invalid utf-8} -body { encoding convertfrom -profile strict utf-8 "\xC0\x81" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'} -test encoding-24.13 {Parse valid or invalid utf-8} -body { +test encoding-24.13 {Parse invalid utf-8} -body { encoding convertfrom -profile strict utf-8 "\xC1\xBF" } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'} -test encoding-24.14 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC2\x80"] +test encoding-24.14 {Parse valid utf-8} { + expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"} } 1 -test encoding-24.15 {Parse valid or invalid utf-8} -body { +test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body { + encoding convertfrom -profile strict utf-8 "Z\xE0\x80" +} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'" +test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body { encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { @@ -855,7 +872,7 @@ test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body { test encoding-24.32 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uFFFF } -result \xEF\xBF\xBF -test encoding-24.33 {Try to generate noncharacter with -profile strict} -body { +test encoding-24.33 {Try to generate invalid utf-8} -body { encoding convertto -profile strict utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { -- cgit v0.12