From f562be24c35d2baff59412a04ebc4df604709e5e Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 7 Mar 2023 21:01:32 +0000 Subject: Start implementing TIP #657. WIP --- doc/Encoding.3 | 13 ++------- generic/tcl.h | 25 ++++++---------- generic/tclCmdAH.c | 2 +- generic/tclEncoding.c | 72 ++--------------------------------------------- generic/tclIO.c | 14 ++------- generic/tclIO.h | 2 -- generic/tclInt.h | 1 - tests/encoding.test | 32 ++++++++++----------- tests/encodingVectors.tcl | 2 +- 9 files changed, 35 insertions(+), 128 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 7b5e9d4..93f389a 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -102,15 +102,8 @@ converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last byte is converted and then to reset to an initial state. -\fBTCL_ENCODING_NOCOMPLAIN\fR signifies that the conversion routine should -not return immediately upon reading a source character that does not exist in -the target encoding, but it will substitute a default fallback character for -all of such characters. The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, -it only has meaning in Tcl 8.x. The flag \fBTCL_ENCODING_STRICT\fR makes the -encoder/decoder more strict in what it considers to be an invalid byte -sequence. The flag \fBTCL_ENCODING_MODIFIED\fR makes -\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the byte -sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. +The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, +it only has meaning in Tcl 8.x. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -241,7 +234,7 @@ if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 The source buffer contained a character that could not be represented in -the target encoding and \fBTCL_ENCODING_NOCOMPLAIN\fR was not specified. +the target encoding. .RE .LP \fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 diff --git a/generic/tcl.h b/generic/tcl.h index fd02ccc..2713966 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1927,8 +1927,6 @@ typedef struct Tcl_EncodingType { * reset to an initial state. If the source * buffer contains the entire input stream to be * converted, this flag should be set. - * TCL_ENCODING_STRICT - Be more strict in accepting what - * is considered a 'invalid byte sequence'. * TCL_ENCODING_STOPONERROR - Not used any more. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need @@ -1955,10 +1953,8 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_START 0x01 #define TCL_ENCODING_END 0x02 #if TCL_MAJOR_VERSION > 8 -# define TCL_ENCODING_STRICT 0x04 # define TCL_ENCODING_STOPONERROR 0x0 /* Not used any more */ #else -# define TCL_ENCODING_STRICT 0x44 # define TCL_ENCODING_STOPONERROR 0x04 #endif #define TCL_ENCODING_NO_TERMINATE 0x08 @@ -1967,8 +1963,12 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_INTERNAL_USE_MASK 0xFF00 /* Reserve top byte for profile values (disjoint, not a mask) */ #define TCL_ENCODING_PROFILE_TCL8 0x01000000 -#define TCL_ENCODING_PROFILE_STRICT 0x02000000 -#define TCL_ENCODING_PROFILE_REPLACE 0x03000000 +#if TCL_MAJOR_VERSION > 8 +# define TCL_ENCODING_PROFILE_STRICT 0x00000000 +#else +# define TCL_ENCODING_PROFILE_STRICT 0x03000000 +#endif +#define TCL_ENCODING_PROFILE_REPLACE 0x02000000 #define TCL_ENCODING_PROFILE_MASK 0xFF000000 #define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK) #define TCL_ENCODING_PROFILE_SET(flags_, profile_) \ @@ -1976,12 +1976,6 @@ typedef struct Tcl_EncodingType { (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \ (flags_) |= profile_; \ } while (0) -/* Still being argued - For Tcl9, is the default strict? TODO */ -#if TCL_MAJOR_VERSION < 9 -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 -#else -#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */ -#endif /* * The following definitions are the error codes returned by the conversion @@ -2002,13 +1996,10 @@ typedef struct Tcl_EncodingType { * TCL_CONVERT_SYNTAX - The source stream contained an invalid * character sequence. This may occur if the * input stream has been damaged or if the input - * encoding method was misidentified. This error - * is reported unless if TCL_ENCODING_NOCOMPLAIN - * was specified. + * encoding method was misidentified. * TCL_CONVERT_UNKNOWN - The source string contained a character that * could not be represented in the target - * encoding. This error is reported unless if - * TCL_ENCODING_NOCOMPLAIN was specified. + * encoding. */ #define TCL_CONVERT_MULTIBYTE (-1) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 7fab2f0..f90018e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -435,7 +435,7 @@ EncodingConvertParseOptions ( Tcl_Obj *dataObj; Tcl_Obj *failVarObj; #if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ + int profile = TCL_ENCODING_PROFILE_STRICT; #else int profile = TCL_ENCODING_PROFILE_TCL8; #endif diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 3842f2f..267a667 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -200,14 +200,10 @@ static struct TclEncodingProfiles { {"tcl8", TCL_ENCODING_PROFILE_TCL8}, }; #define PROFILE_STRICT(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) #define PROFILE_REPLACE(flags_) \ - ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \ - || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \ - && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE)) + (TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) #define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD) #define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800) @@ -1168,10 +1164,6 @@ Tcl_ExternalToUtfDString( * Possible flags values: * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags - * Any other flag bits will cause an error to be returned (for future - * compatibility) * * Results: * The return value is one of @@ -1475,7 +1467,7 @@ Tcl_UtfToExternalDString( * converted string is stored. */ { Tcl_UtfToExternalDStringEx( - NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL); + NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL); return Tcl_DStringValue(dstPtr); } @@ -1490,8 +1482,6 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. It should be composed by OR-ing the following: * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT} - * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile - * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags * * Results: * The return value is one of @@ -2432,7 +2422,6 @@ BinaryProc( if (dstLen < 0) { dstLen = 0; } - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_CHAR_LIMIT) && srcLen > *dstCharsPtr) { srcLen = *dstCharsPtr; } @@ -2500,7 +2489,6 @@ UtfToUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= 6; } @@ -2726,7 +2714,6 @@ Utf32ToUtfProc( int result, numChars, charLimit = INT_MAX; int ch, bytesLeft = srcLen % 4; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -2857,7 +2844,6 @@ UtfToUtf32Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -2955,7 +2941,6 @@ Utf16ToUtfProc( int result, numChars, charLimit = INT_MAX; unsigned short ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; @@ -3095,7 +3080,6 @@ UtfToUtf16Proc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3201,7 +3185,6 @@ UtfToUcs2Proc( int result, numChars, len; Tcl_UniChar ch = 0; - flags = TclEncodingSetProfileFlags(flags); flags |= PTR2INT(clientData); srcStart = src; srcEnd = src + srcLen; @@ -3324,7 +3307,6 @@ TableToUtfProc( const unsigned short *pageZero; TableEncodingData *dataPtr = (TableEncodingData *)clientData; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3453,7 +3435,6 @@ TableFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3560,7 +3541,6 @@ Iso88591ToUtfProc( const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -3645,7 +3625,6 @@ Iso88591FromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -3793,7 +3772,6 @@ EscapeToUtfProc( int state, result, numChars, charLimit = INT_MAX; const char *dstStart, *dstEnd; - flags = TclEncodingSetProfileFlags(flags); if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -4017,7 +3995,6 @@ EscapeFromUtfProc( srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; - flags = TclEncodingSetProfileFlags(flags); if ((flags & TCL_ENCODING_END) == 0) { srcClose -= TCL_UTF_MAX; } @@ -4466,49 +4443,6 @@ TclEncodingProfileIdToName( /* *------------------------------------------------------------------------ * - * TclEncodingSetProfileFlags -- - * - * Maps the flags supported in the encoding C API's to internal flags. - * - * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is - * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile - * specified. - * - * If no profile or an invalid profile is specified, it is set to - * the default. - * - * Results: - * Internal encoding flag mask. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------ - */ -int TclEncodingSetProfileFlags(int flags) -{ - if (flags & TCL_ENCODING_STOPONERROR) { - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT); - } - else { - int profile = TCL_ENCODING_PROFILE_GET(flags); - switch (profile) { - case TCL_ENCODING_PROFILE_TCL8: - case TCL_ENCODING_PROFILE_STRICT: - case TCL_ENCODING_PROFILE_REPLACE: - break; - case 0: /* Unspecified by caller */ - default: - TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT); - break; - } - } - return flags; -} - -/* - *------------------------------------------------------------------------ - * * TclGetEncodingProfiles -- * * Get the list of supported encoding profiles. diff --git a/generic/tclIO.c b/generic/tclIO.c index dd05ee3..3ee2dff 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -1675,12 +1675,10 @@ Tcl_CreateChannel( } statePtr->inputEncodingState = NULL; statePtr->inputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, 0); statePtr->outputEncodingState = NULL; statePtr->outputEncodingFlags = TCL_ENCODING_START; - TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, - TCL_ENCODING_PROFILE_DEFAULT); + TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, 0); /* * Set the channel up initially in AUTO input translation mode to accept @@ -7499,8 +7497,7 @@ Tcl_Eof( ChannelState *statePtr = ((Channel *) chan)->state; /* State of real channel structure. */ - if (GotFlag(statePtr, CHANNEL_NONBLOCKING|CHANNEL_FCOPY) - && GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { return 0; } return GotFlag(statePtr, CHANNEL_EOF) ? 1 : 0; @@ -9631,7 +9628,6 @@ CopyData( * the bottom of the stack. */ - SetFlag(inStatePtr, CHANNEL_FCOPY); inBinary = (inStatePtr->encoding == NULL); outBinary = (outStatePtr->encoding == NULL); sameEncoding = inStatePtr->encoding == outStatePtr->encoding @@ -9747,7 +9743,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } @@ -9839,7 +9834,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } @@ -9862,7 +9856,6 @@ CopyData( TclDecrRefCount(bufObj); bufObj = NULL; } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return TCL_OK; } } /* while */ @@ -9915,7 +9908,6 @@ CopyData( } } } - ResetFlag(inStatePtr, CHANNEL_FCOPY); return result; } diff --git a/generic/tclIO.h b/generic/tclIO.h index 8f0ef8a..a050010 100644 --- a/generic/tclIO.h +++ b/generic/tclIO.h @@ -235,8 +235,6 @@ typedef struct ChannelState { * flushed after every newline. */ #define CHANNEL_UNBUFFERED (1<<5) /* Output to the channel must always * be flushed immediately. */ -#define CHANNEL_FCOPY (1<<6) /* Channel is currently doing an fcopy - * mode. */ #define BG_FLUSH_SCHEDULED (1<<7) /* A background flush of the queued * output buffers has been * scheduled. */ diff --git a/generic/tclInt.h b/generic/tclInt.h index a90ac79..289c902 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -2886,7 +2886,6 @@ TclEncodingProfileNameToId(Tcl_Interp *interp, int *profilePtr); MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp, int profileId); -MODULE_SCOPE int TclEncodingSetProfileFlags(int flags); MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp); /* diff --git a/tests/encoding.test b/tests/encoding.test index 8044c8c..bc330ae 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -465,7 +465,7 @@ test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 test encoding-15.26 {UtfToUtfProc CESU-8} { - encoding convertfrom cesu-8 \xC0\x80 + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { encoding convertfrom -profile strict cesu-8 \x00 @@ -511,21 +511,21 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" -test encoding-16.8 {Utf32ToUtfProc} -body { +test encoding-16.8 {Utf32ToUtfProc} -constraints knownBug -body { set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 } -result \uD800 test encoding-16.10 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00 } -result \uDC00 test encoding-16.11 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 } -result \uD800\uDC00 test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8 @@ -563,13 +563,13 @@ test encoding-16.18 { } [namespace current]] } -result done test encoding-16.19 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-16 "\x41\x41\x41" + encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD -test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { - encoding convertfrom utf-16 "\xD8\xD8" +test encoding-16.20 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile tcl8 utf-16 "\xD8\xD8" } -result \uD8D8 test encoding-16.21 {UnicodeToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" + encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { @@ -616,14 +616,14 @@ test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile str list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { - list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos + list [catch {encoding convertto -profile tcl8 -failindex pos jis0208 \\} res] $res $pos } -result {0 !) -1} test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { - encoding convertfrom ascii AÁ + encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { encoding convertfrom -profile tcl8 ascii AÁ @@ -632,7 +632,7 @@ test encoding-19.3 {TableFromUtfProc} -body { encoding convertfrom -profile strict ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx ascii AÁ] [set idx] + list [encoding convertfrom -profile tcl8 -failindex idx ascii AÁ] [set idx] } -result [list A\xC1 -1] test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] @@ -748,7 +748,7 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} { } [list 3 "乎乞也 (\\u4E4E\\u4E5E\\u4E5F)"] test encoding-24.4 {Parse valid or invalid utf-8} { - string length [encoding convertfrom utf-8 "\xC0\x80"] + string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x80"] } 1 test encoding-24.5 {Parse valid or invalid utf-8} { string length [encoding convertfrom -profile tcl8 utf-8 "\xC0\x81"] @@ -781,7 +781,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "Z\xE0\x80" + encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] @@ -841,7 +841,7 @@ test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { - encoding convertfrom utf-8 \xED\xA0\x80 + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 \xED\xA0\x80 diff --git a/tests/encodingVectors.tcl b/tests/encodingVectors.tcl index b3f3efa..725f4ae 100644 --- a/tests/encodingVectors.tcl +++ b/tests/encodingVectors.tcl @@ -10,7 +10,7 @@ # List of defined encoding profiles set encProfiles {tcl8 strict replace} -set encDefaultProfile tcl8; # Should reflect the default from implementation +set encDefaultProfile strict; # Should reflect the default from implementation # encValidStrings - Table of valid strings. # -- cgit v0.12 From 0eaea8713d066effbb0b2a5062db37be59b615af Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 7 Mar 2023 23:11:17 +0000 Subject: encodingprofile -> profile, and fix more testcases --- generic/tclIO.c | 8 ++++---- tests/chanio.test | 6 +++--- tests/encoding.test | 10 +++++----- tests/io.test | 52 +++++++++++++++++++++++++-------------------------- tests/ioCmd.test | 20 ++++++++++---------- tests/winConsole.test | 14 +++++++------- tests/zlib.test | 4 ++-- 7 files changed, 57 insertions(+), 57 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 3ee2dff..9528896 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7774,7 +7774,7 @@ Tcl_BadChannelOption( { if (interp != NULL) { const char *genericopt = - "blocking buffering buffersize encoding encodingprofile eofchar translation"; + "blocking buffering buffersize encoding eofchar profile translation"; const char **argv; Tcl_Size argc, i; Tcl_DString ds; @@ -7929,11 +7929,11 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(9, "-encodingprofile")) { + if (len == 0 || HaveOpt(1, "-profile")) { int profile; const char *profileName; if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-encodingprofile"); + Tcl_DStringAppendElement(dsPtr, "-profile"); } /* Note currently input and output profiles are same */ profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags); @@ -8209,7 +8209,7 @@ Tcl_SetChannelOption( ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); statePtr->inputEncodingFlags &= ~TCL_ENCODING_END; return TCL_OK; - } else if (HaveOpt(1, "-encodingprofile")) { + } else if (HaveOpt(1, "-profile")) { int profile; if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) { return TCL_ERROR; diff --git a/tests/chanio.test b/tests/chanio.test index dadb997..95cde7f 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -254,7 +254,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod test chan-io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 16 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -267,7 +267,7 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { # be moved to beginning of next channel buffer to preserve requested # buffersize. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f @@ -300,7 +300,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # on flush. The truncated bytes are moved to the beginning of the next # channel buffer. set f [open $path(test1) w] - chan configure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + chan configure $f -encoding jis0208 -buffersize 17 -profile tcl8 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] chan close $f diff --git a/tests/encoding.test b/tests/encoding.test index bc330ae..0497846 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -106,13 +106,13 @@ test encoding-3.2 {Tcl_GetEncodingName, non-null} -setup { } -cleanup { fconfigure stdout -encoding $old } -result {jis0208} -test encoding-3.3 {fconfigure -encodingprofile} -setup { - set old [fconfigure stdout -encodingprofile] +test encoding-3.3 {fconfigure -profile} -setup { + set old [fconfigure stdout -profile] } -body { - fconfigure stdout -encodingprofile replace - fconfigure stdout -encodingprofile + fconfigure stdout -profile replace + fconfigure stdout -profile } -cleanup { - fconfigure stdout -encodingprofile $old + fconfigure stdout -profile $old } -result replace test encoding-4.1 {Tcl_GetEncodingNames} -constraints {testencoding} -setup { diff --git a/tests/io.test b/tests/io.test index 6251a4c..2a18482 100644 --- a/tests/io.test +++ b/tests/io.test @@ -339,7 +339,7 @@ test io-3.4 {WriteChars: loop over stage buffer} -body { # stage buffer maps to more than can be queued at once. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 16 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 16 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -353,7 +353,7 @@ test io-3.5 {WriteChars: saved != 0} -body { # requested buffersize. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -386,7 +386,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { # of the next channel buffer. set f [open $path(test1) w] - fconfigure $f -encoding jis0208 -buffersize 17 -encodingprofile tcl8 + fconfigure $f -encoding jis0208 -buffersize 17 -profile tcl8 puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] close $f @@ -1620,7 +1620,7 @@ test io-12.9 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 10 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c @@ -1633,7 +1633,7 @@ test io-12.10 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 11 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 11 set in [read $f] close $f scan [string index $in end] %c @@ -7689,7 +7689,7 @@ test io-52.20 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf fcopy $in $out @@ -7711,7 +7711,7 @@ test io-52.21 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict fcopy $in $out } -cleanup { @@ -7731,7 +7731,7 @@ test io-52.22 {TclCopyChannel & encodings} -setup { set out [open $path(kyrillic.txt) w] # Using "-encoding ascii" means reading the "Á" gives an error - fconfigure $in -encoding ascii -encodingprofile strict + fconfigure $in -encoding ascii -profile strict fconfigure $out -encoding koi8-r -translation lf proc ::xxx args { set ::s0 $args @@ -7759,7 +7759,7 @@ test io-52.23 {TclCopyChannel & encodings} -setup { # Using "-encoding ascii" means writing the "Á" gives an error fconfigure $in -encoding utf-8 - fconfigure $out -encoding ascii -translation lf -encodingprofile strict + fconfigure $out -encoding ascii -translation lf -profile strict proc ::xxx args { set ::s0 $args } @@ -9125,7 +9125,7 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc puts -nonewline $f A\xC0\x40 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -encodingprofile tcl8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] binary scan $d H* hd @@ -9135,10 +9135,10 @@ test io-75.1 {multibyte encoding error read results in raw bytes (-nocomplainenc removeFile io-75.1 } -result 41c040 -test io-75.2 {unrepresentable character write passes and is replaced by ? (-encodingprofile tcl8)} -setup { +test io-75.2 {unrepresentable character write passes and is replaced by ? (-profile tcl8)} -setup { set fn [makeFile {} io-75.2] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile tcl8 + fconfigure $f -encoding iso8859-1 -profile tcl8 } -body { puts -nonewline $f A\u2022 flush $f @@ -9152,14 +9152,14 @@ test io-75.2 {unrepresentable character write passes and is replaced by ? (-enco # Incomplete sequence test. # This error may IMHO only be detected with the close. # But the read already returns the incomplete sequence. -test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.3 {incomplete multibyte encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.3] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f "A\xC0" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -profile tcl8 } -body { set d [read $f] close $f @@ -9171,7 +9171,7 @@ test io-75.3 {incomplete multibyte encoding read is ignored (-encodingprofile tc # As utf-8 has a special treatment in multi-byte decoding, also test another # one. -test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofile tcl8)} -setup { +test io-75.4 {shiftjis encoding error read results in raw bytes (-profile tcl8)} -setup { set fn [makeFile {} io-75.4] set f [open $fn w+] fconfigure $f -encoding binary @@ -9180,7 +9180,7 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] binary scan $d H* hd @@ -9190,14 +9190,14 @@ test io-75.4 {shiftjis encoding error read results in raw bytes (-encodingprofil removeFile io-75.4 } -result 4181ff41 -test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -setup { +test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { set fn [makeFile {} io-75.5] set f [open $fn w+] fconfigure $f -encoding binary puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile tcl8 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile tcl8 } -body { set d [read $f] close $f @@ -9207,7 +9207,7 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-encodingprofile tcl8)} -s removeFile io-75.5 } -result 4181 -test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -setup { +test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] fconfigure $f -encoding binary @@ -9215,7 +9215,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se puts -nonewline $f A\x1A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9230,7 +9230,7 @@ test io-75.8 {invalid utf-8 encoding eof handling (-encodingprofile strict)} -se test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] - fconfigure $f -encoding iso8859-1 -encodingprofile strict + fconfigure $f -encoding iso8859-1 -profile strict } -body { catch {puts -nonewline $f "A\u2022"} msg flush $f @@ -9251,7 +9251,7 @@ test io-75.10 {incomplete multibyte encoding read is ignored} -setup { puts -nonewline $f A\xC0 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none } -body { set d [read $f] close $f @@ -9274,7 +9274,7 @@ test io-75.11 {shiftjis encoding error read results in raw bytes} -setup { puts -nonewline $f A\x81\xFFA flush $f seek $f 0 - fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding shiftjis -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd @@ -9292,7 +9292,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { puts -nonewline $f A\x81 flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf + fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] close $f @@ -9301,7 +9301,7 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { } -cleanup { removeFile io-75.12 } -result 4181 -test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile strict)} -setup { +test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { set fn [makeFile {} io-75.13] set f [open $fn w+] fconfigure $f -encoding binary @@ -9309,7 +9309,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-encodingprofile stri puts -nonewline $f "A\x81" flush $f seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -encodingprofile strict + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict } -body { set d [read $f] binary scan $d H* hd diff --git a/tests/ioCmd.test b/tests/ioCmd.test index a1ec571..8a68559 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -207,7 +207,7 @@ test iocmd-7.5 {close command} -setup { proc expectedOpts {got extra} { set basicOpts { - -blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation + -blocking -buffering -buffersize -encoding -eofchar -profile -translation } set opts [list {*}$basicOpts {*}$extra] lset opts end [string cat "or " [lindex $opts end]] @@ -244,19 +244,19 @@ test iocmd-8.7 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -profile strict -eofchar {} -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} } -body { set f1 [open $path(test1) w] fconfigure $f1 -translation lf -buffering line -buffersize 3030 \ - -eofchar {} -encoding utf-16 -encodingprofile tcl8 + -eofchar {} -encoding utf-16 -profile tcl8 lappend x [fconfigure $f1 -buffering] lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -profile tcl8 -eofchar {} -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -profile strict -eofchar {} -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -369,8 +369,8 @@ test iocmd-8.20 {fconfigure command / win console channel} -constraints {nonPort # TODO: Test parsing of serial channel options (nonPortable, since requires an # open channel to work with). -test iocmd-8.21 {fconfigure -encodingprofile badprofile} -body { - fconfigure stdin -encodingprofile froboz +test iocmd-8.21 {fconfigure -profile badprofile} -body { + fconfigure stdin -profile froboz } -returnCodes error -result {bad profile name "froboz": must be replace, strict, or tcl8} test iocmd-9.1 {eof command} { @@ -1372,7 +1372,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1381,7 +1381,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1393,7 +1393,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -encodingprofile * -eofchar {} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/winConsole.test b/tests/winConsole.test index 62dfbf3..ede6e92 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -profile -eofchar -inputmode -translation} set testnum 0 foreach {opt result} { @@ -224,7 +224,7 @@ test console-fconfigure-get-1.[incr testnum] { fconfigure -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure get stdout/stderr foreach chan {stdout stderr} major {2 3} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -encodingprofile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -profile -eofchar -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 @@ -260,7 +260,7 @@ foreach chan {stdout stderr} major {2 3} { fconfigure -inputmode } -constraints {win interactive} -body { fconfigure $chan -inputmode - } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -winsize} -returnCodes error + } -result {bad option "-inputmode": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -winsize} -returnCodes error } @@ -330,7 +330,7 @@ test console-fconfigure-set-1.3 { fconfigure stdin -winsize } -constraints {win interactive} -body { fconfigure stdin -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, -translation, or -inputmode} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, -translation, or -inputmode} -returnCodes error ## fconfigure set stdout,stderr @@ -338,13 +338,13 @@ test console-fconfigure-set-2.0 { fconfigure stdout -winsize } -constraints {win interactive} -body { fconfigure stdout -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error test console-fconfigure-set-3.0 { fconfigure stderr -winsize } -constraints {win interactive} -body { fconfigure stderr -winsize {10 30} -} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -encodingprofile, -eofchar, or -translation} -returnCodes error +} -result {bad option "-winsize": should be one of -blocking, -buffering, -buffersize, -encoding, -eofchar, -profile, or -translation} -returnCodes error # Multiple threads diff --git a/tests/zlib.test b/tests/zlib.test index ae7dd6d..b343c06 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -encodingprofile tcl8 -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 5669c89824bbcb01904dc6fde19a8e5713abd4a6 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 8 Mar 2023 22:04:09 +0000 Subject: Oops --- generic/tclIO.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index f3c8480..53213b8 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4608,7 +4608,6 @@ Tcl_GetsObj( /* State info for channel */ ChannelBuffer *bufPtr; int inEofChar, skip, copiedTotal, oldFlags, oldRemoved; - int reportError = 0; Tcl_Size oldLength; Tcl_Encoding encoding; char *dst, *dstEnd, *eol, *eof; @@ -4890,7 +4889,6 @@ Tcl_GetsObj( * point, if desired. */ eol = dstEnd; - reportError = 1; goto gotEOL; } dst = dstEnd; @@ -10206,7 +10204,7 @@ DoRead( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && ((p == dst) || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); - if (!copied) { + if (p == dst) { p = dst - 1; } } -- cgit v0.12 From 2ff0d1c5c1dfa32d96b3d627878eedb04c72b18f Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 9 Mar 2023 16:20:30 +0000 Subject: Bug-fix for Utf32ToUtfProc, in case TCL_UTF_MAX=3 --- generic/tclEncoding.c | 55 ++++++++++++++++++++++++++++++++++++--------- library/tcltest/tcltest.tcl | 16 ++++++------- tests/io.test | 3 ++- 3 files changed, 54 insertions(+), 20 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 267a667..dacc263 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2712,7 +2712,7 @@ Utf32ToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - int ch, bytesLeft = srcLen % 4; + int ch = 0, bytesLeft = srcLen % 4; flags |= PTR2INT(clientData); if (flags & TCL_ENCODING_CHAR_LIMIT) { @@ -2729,6 +2729,21 @@ Utf32ToUtfProc( srcLen -= bytesLeft; } +#if TCL_UTF_MAX < 4 + /* + * If last code point is a high surrogate, we cannot handle that yet, + * unless we are at the end. + */ + + if (!(flags & TCL_ENCODING_END) && (srcLen >= 4) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?3:2)] & 0xFC) == 0xD8) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?2:3)]) == 0) && + ((src[srcLen - ((flags & TCL_ENCODING_LE)?1:4)]) == 0)) { + result = TCL_CONVERT_MULTIBYTE; + srcLen-= 4; + } +#endif + srcStart = src; srcEnd = src + srcLen; @@ -2741,15 +2756,27 @@ Utf32ToUtfProc( break; } +#if TCL_UTF_MAX < 4 + int prev = ch; +#endif if (flags & TCL_ENCODING_LE) { ch = (src[3] & 0xFF) << 24 | (src[2] & 0xFF) << 16 | (src[1] & 0xFF) << 8 | (src[0] & 0xFF); } else { ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF); } +#if TCL_UTF_MAX < 4 + if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; +#if TCL_UTF_MAX < 4 + ch = 0; +#endif break; } if (PROFILE_REPLACE(flags)) { @@ -2770,6 +2797,12 @@ Utf32ToUtfProc( src += 4; } +#if TCL_UTF_MAX < 4 + if ((ch & ~0x3FF) == 0xD800) { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } +#endif /* @@ -2780,16 +2813,16 @@ Utf32ToUtfProc( if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src += bytesLeft; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src += bytesLeft; /* Go past truncated code unit */ + } + } } *srcReadPtr = src - srcStart; diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 7344f9f..1ba5d9f 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -400,7 +400,7 @@ namespace eval tcltest { default { set outputChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $outputChannel -encoding utf-8 + fconfigure $outputChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($outputChannel) 1 @@ -447,7 +447,7 @@ namespace eval tcltest { default { set errorChannel [open $filename a] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $errorChannel -encoding utf-8 + fconfigure $errorChannel -profile tcl8 -encoding utf-8 } set ChannelsWeOpened($errorChannel) 1 @@ -792,7 +792,7 @@ namespace eval tcltest { if {$Option(-loadfile) eq {}} {return} set tmp [open $Option(-loadfile) r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $tmp -encoding utf-8 + fconfigure $tmp -profile tcl8 -encoding utf-8 } loadScript [read $tmp] close $tmp @@ -1340,7 +1340,7 @@ proc tcltest::DefineConstraintInitializers {} { set code 0 if {![catch {set f [open "|[list [interpreter]]" w]}]} { if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } if {![catch {puts $f exit}]} { if {![catch {close $f}]} { @@ -2190,7 +2190,7 @@ proc tcltest::test {name description args} { if {[file readable $testFile]} { set testFd [open $testFile r] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $testFd -encoding utf-8 + fconfigure $testFd -profile tcl8 -encoding utf-8 } set testLine [expr {[lsearch -regexp \ [split [read $testFd] "\n"] \ @@ -2901,7 +2901,7 @@ proc tcltest::runAllTests { {shell ""} } { incr numTestFiles set pipeFd [open $cmd "r"] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $pipeFd -encoding utf-8 + fconfigure $pipeFd -profile tcl8 -encoding utf-8 } while {[gets $pipeFd line] >= 0} { if {[regexp [join { @@ -3101,7 +3101,7 @@ proc tcltest::makeFile {contents name {directory ""}} { set fd [open $fullName w] fconfigure $fd -translation lf if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $fd -encoding utf-8 + fconfigure $fd -profile tcl8 -encoding utf-8 } if {[string index $contents end] eq "\n"} { puts -nonewline $fd $contents @@ -3252,7 +3252,7 @@ proc tcltest::viewFile {name {directory ""}} { set fullName [file join $directory $name] set f [open $fullName] if {[package vsatisfies [package provide Tcl] 8.7-]} { - fconfigure $f -encoding utf-8 + fconfigure $f -profile tcl8 -encoding utf-8 } set data [read -nonewline $f] close $f diff --git a/tests/io.test b/tests/io.test index 8dde2b2..a8ec7e5 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9292,10 +9292,10 @@ test io-75.12 {invalid utf-8 encoding read is ignored} -setup { fconfigure $f -encoding utf-8 -profile tcl8 -buffering none -eofchar "" -translation lf } -body { set d [read $f] - close $f binary scan $d H* hd set hd } -cleanup { + close $f removeFile io-75.12 } -result 4181 test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { @@ -9310,6 +9310,7 @@ test io-75.13 {invalid utf-8 encoding read is not ignored (-profile strict)} -se } -body { read $f } -cleanup { + close $f removeFile io-75.13 } -match glob -returnCodes 1 -result {error reading "*": illegal byte sequence} -- cgit v0.12 From 6e644e2a603401e7062f75c483325edf779f497a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Fri, 10 Mar 2023 15:55:51 +0000 Subject: Implement new function Tcl_InputEncodingError() --- doc/OpenFileChnl.3 | 15 +++++++++++++-- generic/tcl.decls | 5 +++++ generic/tclDecls.h | 8 +++++--- generic/tclIO.c | 26 ++++++++++++++++++++++++++ generic/tclStubInit.c | 2 +- library/http/http.tcl | 27 ++++++++++++++++++++++++--- library/tcltest/tcltest.tcl | 41 +++++++++++++++++++++++++++++++++++++++-- 7 files changed, 113 insertions(+), 11 deletions(-) diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3 index 1b9d5d3..cac1723 100644 --- a/doc/OpenFileChnl.3 +++ b/doc/OpenFileChnl.3 @@ -9,7 +9,7 @@ .BS '\" Note: do not modify the .SH NAME line immediately below! .SH NAME -Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels +Tcl_OpenFileChannel, Tcl_OpenCommandChannel, Tcl_MakeFileChannel, Tcl_GetChannel, Tcl_GetChannelNames, Tcl_GetChannelNamesEx, Tcl_RegisterChannel, Tcl_UnregisterChannel, Tcl_DetachChannel, Tcl_IsStandardChannel, Tcl_Close, Tcl_ReadChars, Tcl_Read, Tcl_GetsObj, Tcl_Gets, Tcl_WriteObj, Tcl_WriteChars, Tcl_Write, Tcl_Flush, Tcl_Seek, Tcl_Tell, Tcl_TruncateChannel, Tcl_GetChannelOption, Tcl_SetChannelOption, Tcl_Eof, Tcl_InputBlocked, Tcl_InputBuffered, Tcl_InputEncodingError, Tcl_OutputBuffered, Tcl_Ungets, Tcl_ReadRaw, Tcl_WriteRaw \- buffered I/O facilities using channels .SH SYNOPSIS .nf \fB#include \fR @@ -90,6 +90,9 @@ int \fBTcl_InputBuffered\fR(\fIchannel\fR) .sp int +\fBTcl_InputEncodingError\fR(\fIchannel\fR) +.sp +int \fBTcl_OutputBuffered\fR(\fIchannel\fR) .sp long long @@ -476,12 +479,20 @@ that can be retrieved with \fBTcl_GetErrno\fR. \fBTcl_GetsObj\fR also returns TCL_INDEX_NONE if the end of the file is reached; the \fBTcl_Eof\fR procedure can be used to distinguish an error from an end-of-file condition. .PP -If the channel is in nonblocking mode, the return value can also be TCL_INDEX_NONE +If the channel is in blocking mode, the return value can also be TCL_INDEX_NONE if no data was available or the data that was available did not contain an end-of-line character. When TCL_INDEX_NONE is returned, the \fBTcl_InputBlocked\fR procedure may be invoked to determine if the channel is blocked because of input unavailability. .PP +If the channel is in blocking mode, it might be that there is data available +but - at the same time - an encoding error occurred. In that case, the +POSIX error EILSEQ will be recorded, but - since \fBTcl_Gets\fR/\fBTcl_Read\fR +didn't return TCL_INDEX_NONE we cannot be sure if the POSIX error +maybe was a left-over from an earlier error. The only way to be sure +is calling the \fBTcl_InputEncodingError\fR procedure, it will +return 1 if the channel is at an encoding error position. +.PP \fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting characters are appended to the dynamic string given by \fIlineRead\fR rather than a Tcl value. diff --git a/generic/tcl.decls b/generic/tcl.decls index 1608a88..403dc38 100644 --- a/generic/tcl.decls +++ b/generic/tcl.decls @@ -2626,6 +2626,11 @@ declare 685 { Tcl_Obj *Tcl_DStringToObj(Tcl_DString *dsPtr) } +# TIP 657 +declare 686 { + int Tcl_InputEncodingError(Tcl_Channel chan) +} + # ----- BASELINE -- FOR -- 8.7.0 / 9.0.0 ----- # declare 687 { diff --git a/generic/tclDecls.h b/generic/tclDecls.h index ec9a49a..99661f4 100644 --- a/generic/tclDecls.h +++ b/generic/tclDecls.h @@ -1861,7 +1861,8 @@ EXTERN int Tcl_GetWideUIntFromObj(Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 685 */ EXTERN Tcl_Obj * Tcl_DStringToObj(Tcl_DString *dsPtr); -/* Slot 686 is reserved */ +/* 686 */ +EXTERN int Tcl_InputEncodingError(Tcl_Channel chan); /* 687 */ EXTERN void TclUnusedStubEntry(void); @@ -2561,7 +2562,7 @@ typedef struct TclStubs { Tcl_Size (*tcl_GetEncodingNulLength) (Tcl_Encoding encoding); /* 683 */ int (*tcl_GetWideUIntFromObj) (Tcl_Interp *interp, Tcl_Obj *objPtr, Tcl_WideUInt *uwidePtr); /* 684 */ Tcl_Obj * (*tcl_DStringToObj) (Tcl_DString *dsPtr); /* 685 */ - void (*reserved686)(void); + int (*tcl_InputEncodingError) (Tcl_Channel chan); /* 686 */ void (*tclUnusedStubEntry) (void); /* 687 */ } TclStubs; @@ -3887,7 +3888,8 @@ extern const TclStubs *tclStubsPtr; (tclStubsPtr->tcl_GetWideUIntFromObj) /* 684 */ #define Tcl_DStringToObj \ (tclStubsPtr->tcl_DStringToObj) /* 685 */ -/* Slot 686 is reserved */ +#define Tcl_InputEncodingError \ + (tclStubsPtr->tcl_InputEncodingError) /* 686 */ #define TclUnusedStubEntry \ (tclStubsPtr->tclUnusedStubEntry) /* 687 */ diff --git a/generic/tclIO.c b/generic/tclIO.c index 53213b8..0d6c108 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7615,6 +7615,32 @@ Tcl_InputBuffered( return bytesBuffered; } + +/* + *---------------------------------------------------------------------- + * + * Tcl_InputEncodingError -- + * + * Returns 1 if input is in an encoding error position, 0 otherwise. + * + * Results: + * 0 or 1, always. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_InputEncodingError( + Tcl_Channel chan) /* Is this channel blocked? */ +{ + ChannelState *statePtr = ((Channel *) chan)->state; + /* State of real channel structure. */ + + return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0; +} /* *---------------------------------------------------------------------- diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c index dbd8b52..05f0ac7 100644 --- a/generic/tclStubInit.c +++ b/generic/tclStubInit.c @@ -1492,7 +1492,7 @@ const TclStubs tclStubs = { Tcl_GetEncodingNulLength, /* 683 */ Tcl_GetWideUIntFromObj, /* 684 */ Tcl_DStringToObj, /* 685 */ - 0, /* 686 */ + Tcl_InputEncodingError, /* 686 */ TclUnusedStubEntry, /* 687 */ }; diff --git a/library/http/http.tcl b/library/http/http.tcl index 88f66eb..fb49954 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1746,6 +1746,9 @@ proc http::OpenSocket {token DoLater} { } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } ##Log socket opened, DONE fconfigure - token $token } @@ -2164,6 +2167,9 @@ proc http::Connected {token proto phost srvurl} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. @@ -2554,6 +2560,9 @@ proc http::ReceiveResponse {token} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) + if {[package vsatisfies [package provide Tcl] 9.0-]} { + fconfigure $sock -profile replace \ + } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token @@ -4545,7 +4554,11 @@ proc http::Eot {token {reason {}}} { set enc [CharsetToEncoding $state(charset)] if {$enc ne "binary"} { - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } else { + set state(body) [encoding convertfrom $enc $state(body)] + } } # Translate text line endings. @@ -4628,7 +4641,11 @@ proc http::GuessType {token} { if {$enc eq "binary"} { return 0 } - set state(body) [encoding convertfrom -profile tcl8 $enc $state(body)] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } else { + set state(body) [encoding convertfrom -profile replace $enc $state(body)] + } set state(body) [string map {\r\n \n \r \n} $state(body)] set state(type) application/xml set state(binary) 0 @@ -4709,7 +4726,11 @@ proc http::quoteString {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - set string [encoding convertto -profile tcl8 $http(-urlencoding) $string] + if {[package vsatisfies [package provide Tcl] 9.0-]} { + set string [encoding convertto -profile replace $http(-urlencoding) $string] + } else { + set string [encoding convertto $http(-urlencoding) $string] + } return [string map $formMap $string] } diff --git a/library/tcltest/tcltest.tcl b/library/tcltest/tcltest.tcl index 1ba5d9f..12791da 100644 --- a/library/tcltest/tcltest.tcl +++ b/library/tcltest/tcltest.tcl @@ -1134,6 +1134,39 @@ proc tcltest::SafeFetch {n1 n2 op} { } } + +# tcltest::Asciify -- +# +# Transforms the passed string to contain only printable ascii characters. +# Useful for printing to terminals. Non-printables are mapped to +# \x, \u or \U sequences. +# +# Arguments: +# s - string to transform +# +# Results: +# The transformed strings +# +# Side effects: +# None. + +proc tcltest::Asciify {s} { + set print "" + foreach c [split $s ""] { + set i [scan $c %c] + if {[string is print $c] && ($i <= 127)} { + append print $c + } elseif {$i <= 0xff} { + append print \\x[format %02X $i] + } elseif {$i <= 0xffff} { + append print \\u[format %04X $i] + } else { + append print \\U[format %08X $i] + } + } + return $print +} + # tcltest::ConstraintInitializer -- # # Get or set a script that when evaluated in the tcltest namespace @@ -2221,9 +2254,13 @@ proc tcltest::test {name description args} { if {$scriptCompare} { puts [outputChannel] "---- Error testing result: $scriptMatch" } else { - puts [outputChannel] "---- Result was:\n$actualAnswer" + if {[catch { + puts [outputChannel] "---- Result was:\n[Asciify $actualAnswer]" + } errMsg]} { + puts [outputChannel] "\n---- Result was:\n" + } puts [outputChannel] "---- Result should have been\ - ($match matching):\n$result" + ($match matching):\n[Asciify $result]" } } if {$errorCodeFailure} { -- cgit v0.12 From 9e4ce6c3b9c56c4d2bd3e8268208716eeeeaf764 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 11 Mar 2023 21:19:46 +0000 Subject: Fix last (hopefully) bugs in utf-16/utf-32 encoders --- generic/tclEncoding.c | 64 +++++++++++++++++++++++++++------------------------ tests/chanio.test | 2 +- tests/encoding.test | 10 +------- tests/io.test | 2 +- 4 files changed, 37 insertions(+), 41 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index ac65f49..609ddad 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2723,8 +2723,8 @@ Utf32ToUtfProc( /* * Check alignment with utf-32 (4 == sizeof(UTF-32)) */ + if (bytesLeft != 0) { - /* We have a truncated code unit */ result = TCL_CONVERT_MULTIBYTE; srcLen -= bytesLeft; } @@ -2771,7 +2771,13 @@ Utf32ToUtfProc( } #endif - if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) { + if ((unsigned)ch > 0x10FFFF) { + ch = 0xFFFD; + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + break; + } + } else if (SURROGATE(ch)) { if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; #if TCL_UTF_MAX < 4 @@ -2794,7 +2800,7 @@ Utf32ToUtfProc( } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += 4; + src += sizeof(unsigned int); } #if TCL_UTF_MAX < 4 @@ -2804,27 +2810,22 @@ Utf32ToUtfProc( } #endif - - /* - * If we had a truncated code unit at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. - */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { + /* destination is not full, so we really are at the end now */ if (PROFILE_STRICT(flags)) { result = TCL_CONVERT_SYNTAX; } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + dst += Tcl_UniCharToUtf(0xFFFD, dst); numChars++; - src += bytesLeft; /* Go past truncated code unit */ + src += bytesLeft; } } } - *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -3019,6 +3020,12 @@ Utf16ToUtfProc( ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); } if (((prev & ~0x3FF) == 0xD800) && ((ch & ~0x3FF) != 0xDC00)) { + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; /* Go back to before the high surrogate */ + dst--; /* Also undo writing a single byte too much */ + break; + } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } @@ -3028,10 +3035,12 @@ Utf16ToUtfProc( * unsigned short-size data. */ - if (ch && ch < 0x80) { + if ((unsigned)ch - 1 < 0x7F) { *dst++ = (ch & 0xFF); - } else { + } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else { + dst += Tcl_UniCharToUtf(ch, dst); } src += sizeof(unsigned short); } @@ -3040,27 +3049,22 @@ Utf16ToUtfProc( /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ dst += Tcl_UniCharToUtf(-1, dst); } - - /* - * If we had a truncated code unit at the end AND this is the last - * fragment AND profile is not "strict", stick FFFD in its place. - */ if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { + /* We have a single byte left-over at the end */ if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { - if (PROFILE_STRICT(flags)) { - result = TCL_CONVERT_SYNTAX; - } else { - /* PROFILE_REPLACE or PROFILE_TCL8 */ - result = TCL_OK; - dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); - numChars++; - src++; /* Go past truncated code unit */ - } - } + if (PROFILE_STRICT(flags)) { + result = TCL_CONVERT_SYNTAX; + } else { + /* PROFILE_REPLACE or PROFILE_TCL8 */ + result = TCL_OK; + dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst); + numChars++; + src++; /* Go past truncated code unit */ + } + } } - *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; diff --git a/tests/chanio.test b/tests/chanio.test index a065fde..ee6133e 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -4982,7 +4982,7 @@ test chan-io-38.2 {Tcl_SetChannelBufferSize, Tcl_GetChannelBufferSize} -setup { test chan-io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - chan configure $chan -buffersize 10 + chan configure $chan -buffersize 10 -encoding utf-8 set var [chan read $chan 2] chan configure $chan -buffersize 32 append var [chan read $chan] diff --git a/tests/encoding.test b/tests/encoding.test index df67af8..d954870 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -511,11 +511,9 @@ test encoding-16.7 {Utf32ToUtfProc} -body { set val [encoding convertfrom utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" -test encoding-16.8 {Utf32ToUtfProc} -constraints knownBug -body { +test encoding-16.8 {Utf32ToUtfProc} -body { set val [encoding convertfrom -profile tcl8 utf-32 \x41\x00\x00\x41] list $val [format %x [scan $val %c]] -} -constraints { - encodingProfileTodo } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 @@ -607,8 +605,6 @@ test encoding-17.10 {Utf32ToUtfProc} -body { test encoding-18.1 {TableToUtfProc on invalid input} -body { list [catch {encoding convertto jis0208 \\} res] $res -} -constraints { - encodingProfileTodo } -result {1 {unexpected character at index 0: 'U+00005C'}} test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { list [catch {encoding convertto -profile strict jis0208 \\} res] $res @@ -798,8 +794,6 @@ test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring - } -result "Z\xC3\xA0\xE2\x82\xACxxxxxx" test encoding-24.19 {Parse valid or invalid utf-8} -body { encoding convertto utf-8 "ZX\uD800" -} -constraints { - encodingProfileTodo } -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'" test encoding-24.20 {Parse with -profile tcl8 but without providing encoding} -body { encoding convertfrom -profile tcl8 "\x20" @@ -857,8 +851,6 @@ test encoding-24.37 {Parse invalid utf-8 with -profile tcl8} -body { } -result \uD800 test encoding-24.38 {Try to generate invalid utf-8} -body { encoding convertto utf-8 \uD800 -} -constraints { - encodingProfileTodo } -returnCodes 1 -result {unexpected character at index 0: 'U+00D800'} test encoding-24.39 {Try to generate invalid utf-8 with -profile strict} -body { encoding convertto -profile strict utf-8 \uD800 diff --git a/tests/io.test b/tests/io.test index a8ec7e5..b077c52 100644 --- a/tests/io.test +++ b/tests/io.test @@ -5541,7 +5541,7 @@ test io-38.3 {Tcl_SetChannelBufferSize, changing buffersize between reads} { # This test crashes the interp if Bug #427196 is not fixed set chan [open [info script] r] - fconfigure $chan -buffersize 10 + fconfigure $chan -buffersize 10 -encoding utf-8 set var [read $chan 2] fconfigure $chan -buffersize 32 append var [read $chan] -- cgit v0.12 From f5bd004df9a90f12fba3280692ffefd5ea3c9188 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 11:51:17 +0000 Subject: Make testcase io-53.5 independant on system encoding --- tests/io.test | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/io.test b/tests/io.test index b077c52..795d91e 100644 --- a/tests/io.test +++ b/tests/io.test @@ -7919,6 +7919,8 @@ test io-53.5 {CopyData: error during fcopy} {socket fcopy} { set out [socket 127.0.0.1 [lindex [fconfigure $listen -sockname] 2]] catch {unset fcopyTestDone} close $listen ;# This means the socket open never really succeeds + fconfigure $in -encoding utf-8 + fconfigure $out -encoding utf-8 fcopy $in $out -command [namespace code FcopyTestDone] variable fcopyTestDone if {![info exists fcopyTestDone]} { -- cgit v0.12 From c43f6e9701ac22b32b3d075413317e79e8c8057b Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sun, 12 Mar 2023 13:45:51 +0000 Subject: More utf-16 bugfixing --- generic/tclEncoding.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 806a052..4fc4cbd 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -3024,6 +3024,7 @@ Utf16ToUtfProc( result = TCL_CONVERT_UNKNOWN; src -= 2; /* Go back to before the high surrogate */ dst--; /* Also undo writing a single byte too much */ + numChars--; break; } /* Bug [10c2c17c32]. If Hi surrogate not followed by Lo surrogate, finish 3-byte UTF-8 */ @@ -3039,6 +3040,10 @@ Utf16ToUtfProc( *dst++ = (ch & 0xFF); } else if (((prev & ~0x3FF) == 0xD800) || ((ch & ~0x3FF) == 0xD800)) { dst += Tcl_UniCharToUtf(ch | TCL_COMBINE, dst); + } else if (((ch & ~0x3FF) == 0xDC00) && PROFILE_STRICT(flags)) { + /* Lo surrogate not preceded by Hi surrogate */ + result = TCL_CONVERT_UNKNOWN; + break; } else { dst += Tcl_UniCharToUtf(ch, dst); } @@ -3046,8 +3051,15 @@ Utf16ToUtfProc( } if ((ch & ~0x3FF) == 0xD800) { - /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ - dst += Tcl_UniCharToUtf(-1, dst); + if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) { + result = TCL_CONVERT_UNKNOWN; + src -= 2; + dst--; + numChars--; + } else { + /* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */ + dst += Tcl_UniCharToUtf(-1, dst); + } } if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) { /* We have a single byte left-over at the end */ -- cgit v0.12 From 362a0e8ba6f8c6e7c937982a09b164ddf488caeb Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 13 Mar 2023 17:24:22 +0000 Subject: Adapt more test expectation (since the default is now -profile strict) --- tests/encoding.test | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/encoding.test b/tests/encoding.test index d954870..91cd8ff 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -492,7 +492,7 @@ test encoding-16.2 {Utf16ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xDC\xDC"] + set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" test encoding-16.4 {Ucs2ToUtfProc} -body { @@ -528,16 +528,16 @@ test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xD8 } -result \uD800 test encoding-16.14 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC + encoding convertfrom -profile tcl8 utf-16le \x00\xDC } -result \uDC00 test encoding-16.15 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC } -result \U010000 test encoding-16.16 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC\x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] -- cgit v0.12 From d023df86238ba0a0020a0ddc064eb076dcac6702 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 14 Mar 2023 20:24:27 +0000 Subject: Implement return options for read/gets --- generic/tclIO.c | 12 +++++++++--- generic/tclIOCmd.c | 33 ++++++++++++++++++++++++++------- tests/io.test | 44 ++++++++++++++++++++++++++++++++++++++++++++ tests/winConsole.test | 4 ++-- 4 files changed, 81 insertions(+), 12 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 0d6c108..07bb15d 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -5006,6 +5006,13 @@ Tcl_GetsObj( } UpdateInterest(chanPtr); TclChannelRelease((Tcl_Channel)chanPtr); + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && + (copiedTotal == 0 || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { + Tcl_SetErrno(EILSEQ); + if (copiedTotal == 0) { + copiedTotal = -1; + } + } return copiedTotal; } @@ -6056,7 +6063,6 @@ DoReadChars( * like [read] can return an error. */ Tcl_SetErrno(EILSEQ); - copied = -1; goto finish; } } @@ -10231,11 +10237,11 @@ DoRead( && ((p == dst) || !GotFlag(statePtr, CHANNEL_NONBLOCKING))) { Tcl_SetErrno(EILSEQ); if (p == dst) { - p = dst - 1; + p = dst - 1; } } TclChannelRelease((Tcl_Channel)chanPtr); - return (int)(p - dst); + return (Tcl_Size)(p - dst); } /* diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 6ec5891..29e52fb 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -315,14 +315,22 @@ Tcl_GetsObjCmd( */ if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto getsError; } code = TCL_ERROR; goto done; } lineLen = TCL_IO_FAILURE; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), linePtr); + Tcl_SetReturnOptions(interp, returnOpts); + getsError: + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + code = TCL_ERROR; + goto done; } if (objc == 3) { if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr, @@ -438,13 +446,24 @@ Tcl_ReadObjCmd( * regular message if nothing was found in the bypass. */ + Tcl_DecrRefCount(resultPtr); if (!TclChanCaughtErrorBypass(interp, chan)) { - Tcl_SetObjResult(interp, Tcl_ObjPrintf( - "error reading \"%s\": %s", - TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + goto readError; } TclChannelRelease(chan); - Tcl_DecrRefCount(resultPtr); + return TCL_ERROR; + } else if (Tcl_InputEncodingError(chan)) { + Tcl_Obj *returnOpts = Tcl_NewDictObj(); + Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "error reading \"%s\": %s", + TclGetString(chanObjPtr), Tcl_PosixError(interp))); + Tcl_SetReturnOptions(interp, returnOpts); + readError: + TclChannelRelease(chan); return TCL_ERROR; } diff --git a/tests/io.test b/tests/io.test index 454f5a4..b74423c 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9216,6 +9216,50 @@ test io-75.5 {invalid utf-8 encoding read is ignored (-profile tcl8)} -setup { removeFile io-75.5 } -result 4181 +test io-75.6 {invalid utf-8 encoding read is not ignored (-profile strict)} -setup { + set fn [makeFile {} io-75.6] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result +} -cleanup { + close $f + removeFile io-75.6 +} -match glob -result {A {error reading "*": illegal byte sequence}} + +test io-75.7 {invalid utf-8 encoding eof handling (-profile strict)} -setup { + set fn [makeFile {} io-75.7] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8 + puts -nonewline $f A\x81 + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar "" -translation lf -profile strict +} -body { + try { + read $f + } on error {result options} { + set data [dict get $options -data] + } + lappend data $result + fconfigure $f -encoding iso8859-1 + lappend data [read $f] +} -cleanup { + close $f + removeFile io-75.7 +} -match glob -result "A {error reading \"*\": illegal byte sequence} \x81" + test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { set fn [makeFile {} io-75.8] set f [open $fn w+] diff --git a/tests/winConsole.test b/tests/winConsole.test index ede6e92..4eccf81 100644 --- a/tests/winConsole.test +++ b/tests/winConsole.test @@ -198,7 +198,7 @@ test console-fconfigure-get-1.0 { Console get stdin configuration } -constraints {win interactive} -body { lsort [dict keys [fconfigure stdin]] -} -result {-blocking -buffering -buffersize -encoding -profile -eofchar -inputmode -translation} +} -result {-blocking -buffering -buffersize -encoding -eofchar -profile -inputmode -translation} set testnum 0 foreach {opt result} { @@ -232,7 +232,7 @@ foreach chan {stdout stderr} major {2 3} { win interactive } -body { lsort [dict keys [fconfigure $chan]] - } -result {-blocking -buffering -buffersize -encoding -profile -eofchar -translation -winsize} + } -result {-blocking -buffering -buffersize -encoding -eofchar -profile -translation -winsize} set testnum 0 foreach {opt result} { -blocking 1 -- cgit v0.12 From 3ac0d72dd626a276424a1589dbe15228fc35615c Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 08:22:35 +0000 Subject: Allow -encoding to be shortened (again) --- generic/tclIO.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 9a846da..0fec0f2 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -7990,7 +7990,7 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(8, "-encoding")) { + if (len == 0 || HaveOpt(2, "-encoding")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-encoding"); } -- cgit v0.12 From b6ccec9b3f11c4ad0aab561a0a86ec3320e8ee07 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 16 Mar 2023 21:03:28 +0000 Subject: Don't reset CHANNEL_ENCODING_ERROR here, otherwise Tcl_InputEncodingError() will give wrong result --- generic/tclIO.c | 1 - 1 file changed, 1 deletion(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 49500e3..d013679 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4618,7 +4618,6 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); return TCL_INDEX_NONE; } -- cgit v0.12 From 0625218c8505d265ee7d2da3d8c7f7aad6879cf7 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Sat, 18 Mar 2023 13:36:10 +0000 Subject: See if less "-profile replace" suffices --- library/http/http.tcl | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/library/http/http.tcl b/library/http/http.tcl index 3410c46..c730eeb 100644 --- a/library/http/http.tcl +++ b/library/http/http.tcl @@ -1746,9 +1746,6 @@ proc http::OpenSocket {token DoLater} { } fconfigure $sock -translation {auto crlf} \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } ##Log socket opened, DONE fconfigure - token $token } @@ -2167,9 +2164,6 @@ proc http::Connected {token proto phost srvurl} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list $trRead crlf] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } # The following is disallowed in safe interpreters, but the socket is # already in non-blocking mode in that case. @@ -2560,9 +2554,6 @@ proc http::ReceiveResponse {token} { lassign [fconfigure $sock -translation] trRead trWrite fconfigure $sock -translation [list auto $trWrite] \ -buffersize $state(-blocksize) - if {[package vsatisfies [package provide Tcl] 9.0-]} { - fconfigure $sock -profile replace \ - } Log ^D$tk begin receiving response - token $token coroutine ${token}--EventCoroutine http::Event $sock $token @@ -4726,11 +4717,7 @@ proc http::quoteString {string} { # a pre-computed map and [string map] to do the conversion (much faster # than [regsub]/[subst]). [Bug 1020491] - if {[package vsatisfies [package provide Tcl] 9.0-]} { - set string [encoding convertto -profile replace $http(-urlencoding) $string] - } else { - set string [encoding convertto $http(-urlencoding) $string] - } + set string [encoding convertto $http(-urlencoding) $string] return [string map $formMap $string] } -- cgit v0.12 From 34ecddb6102a17c7771e30f8b9bb559adc312ea3 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 21 Mar 2023 17:47:33 +0000 Subject: exchange profile <-> eofchar output in "fconfigure". Fix some testcases, which depend on profile --- generic/tclIO.c | 28 ++++++++++++++-------------- tests/chanio.test | 2 +- tests/io.test | 2 +- tests/ioCmd.test | 12 ++++++------ tests/zlib.test | 4 ++-- 5 files changed, 24 insertions(+), 24 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index d013679..877e670 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -8005,6 +8005,20 @@ Tcl_GetChannelOption( return TCL_OK; } } + if (len == 0 || HaveOpt(2, "-eofchar")) { + char buf[4] = ""; + if (len == 0) { + Tcl_DStringAppendElement(dsPtr, "-eofchar"); + } + if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { + sprintf(buf, "%c", statePtr->inEofChar); + } + if (len > 0) { + Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); + return TCL_OK; + } + Tcl_DStringAppendElement(dsPtr, buf); + } if (len == 0 || HaveOpt(1, "-profile")) { int profile; const char *profileName; @@ -8022,20 +8036,6 @@ Tcl_GetChannelOption( return TCL_OK; } } - if (len == 0 || HaveOpt(2, "-eofchar")) { - char buf[4] = ""; - if (len == 0) { - Tcl_DStringAppendElement(dsPtr, "-eofchar"); - } - if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) { - sprintf(buf, "%c", statePtr->inEofChar); - } - if (len > 0) { - Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE); - return TCL_OK; - } - Tcl_DStringAppendElement(dsPtr, buf); - } if (len == 0 || HaveOpt(1, "-translation")) { if (len == 0) { Tcl_DStringAppendElement(dsPtr, "-translation"); diff --git a/tests/chanio.test b/tests/chanio.test index ee6133e..8534b3b 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -1098,7 +1098,7 @@ test chan-io-7.3 {FilterInputBytes: split up character at EOF} -setup { chan puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" chan close $f set f [open $path(test1)] - chan configure $f -encoding shiftjis + chan configure $f -encoding shiftjis -profile tcl8 lappend x [chan gets $f line] $line lappend x [chan tell $f] [testchannel inputbuffered $f] [chan eof $f] lappend x [chan gets $f line] $line diff --git a/tests/io.test b/tests/io.test index a2e4dc3..5fb2415 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1195,7 +1195,7 @@ test io-7.3 {FilterInputBytes: split up character at EOF} {testchannel} { puts -nonewline $f "1234567890123\x82\x4F\x82\x50\x82" close $f set f [open $path(test1)] - fconfigure $f -encoding shiftjis + fconfigure $f -encoding shiftjis -profile tcl8 set x [list [gets $f line] $line] lappend x [tell $f] [testchannel inputbuffered $f] [eof $f] lappend x [gets $f line] $line diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 530c700..7148ad5 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -244,7 +244,7 @@ test iocmd-8.7 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -profile strict -eofchar {} -translation lf} +} -result {-blocking 1 -buffering full -buffersize 4096 -encoding utf-16 -eofchar {} -profile strict -translation lf} test iocmd-8.8 {fconfigure command} -setup { file delete $path(test1) set x {} @@ -256,7 +256,7 @@ test iocmd-8.8 {fconfigure command} -setup { lappend x [fconfigure $f1] } -cleanup { catch {close $f1} -} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -profile tcl8 -eofchar {} -translation lf}} +} -result {line {-blocking 1 -buffering line -buffersize 3030 -encoding utf-16 -eofchar {} -profile tcl8 -translation lf}} test iocmd-8.9 {fconfigure command} -setup { file delete $path(test1) } -body { @@ -266,7 +266,7 @@ test iocmd-8.9 {fconfigure command} -setup { fconfigure $f1 } -cleanup { catch {close $f1} -} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -profile strict -eofchar {} -translation lf} +} -result {-blocking 1 -buffering none -buffersize 4040 -encoding binary -eofchar {} -profile strict -translation lf} test iocmd-8.10 {fconfigure command} -returnCodes error -body { fconfigure a b } -result {can not find channel named "a"} @@ -1372,7 +1372,7 @@ test iocmd-25.1 {chan configure, cgetall, standard options} -match glob -body { close $c rename foo {} set res -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { set res {} proc foo {args} {oninit cget cgetall; onfinal; track; return ""} @@ -1381,7 +1381,7 @@ test iocmd-25.2 {chan configure, cgetall, no options} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *}}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *}}} test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { set res {} proc foo {args} { @@ -1393,7 +1393,7 @@ test iocmd-25.3 {chan configure, cgetall, regular result} -match glob -body { close $c rename foo {} set res -} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -profile * -eofchar {} -translation {auto *} -bar foo -snarf x}} +} -result {{cgetall rc*} {-blocking 1 -buffering full -buffersize 4096 -encoding * -eofchar {} -profile * -translation {auto *} -bar foo -snarf x}} test iocmd-25.4 {chan configure, cgetall, bad result, list of uneven length} -match glob -body { set res {} proc foo {args} { diff --git a/tests/zlib.test b/tests/zlib.test index b343c06..544e6d4 100644 --- a/tests/zlib.test +++ b/tests/zlib.test @@ -292,7 +292,7 @@ test zlib-8.6 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 1 -dictionary {}} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} test zlib-8.7 {transformation and fconfigure} -setup { set file [makeFile {} test.gz] set fd [open $file wb] @@ -302,7 +302,7 @@ test zlib-8.7 {transformation and fconfigure} -setup { } -cleanup { catch {close $fd} removeFile $file -} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -profile strict -eofchar {} -translation lf}} +} -result {{-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf -checksum 0} {-blocking 1 -buffering full -buffersize 4096 -encoding binary -eofchar {} -profile strict -translation lf}} # Input is headers from fetching SPDY draft # Dictionary is that which is proposed _in_ SPDY draft set spdyHeaders "HTTP/1.0 200 OK\r\nContent-Type: text/html; charset=utf-8\r\nX-Robots-Tag: noarchive\r\nLast-Modified: Tue, 05 Jun 2012 02:43:25 GMT\r\nETag: \"1338864205129|#public|0|en|||0\"\r\nExpires: Tue, 05 Jun 2012 16:17:11 GMT\r\nDate: Tue, 05 Jun 2012 16:17:06 GMT\r\nCache-Control: public, max-age=5\r\nX-Content-Type-Options: nosniff\r\nX-XSS-Protection: 1; mode=block\r\nServer: GSE\r\n" -- cgit v0.12 From 2ffd05e70358635a831fc16b449e0021c4c00c14 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 23 Mar 2023 17:31:37 +0000 Subject: documentation update --- doc/Encoding.3 | 2 +- doc/encoding.n | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 1f0dbdf..356f582 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -590,7 +590,7 @@ with at most one of \fBTCL_ENCODING_PROFILE_TCL8\fR, \fBTCL_ENCODING_PROFILE_STRICT\fR or \fBTCL_ENCODING_PROFILE_REPLACE\fR. These correspond to the \fBtcl8\fR, \fBstrict\fR and \fBreplace\fR profiles respectively. If none are specified, a version-dependent default profile is used. -For Tcl 8.7, the default profile is \fBtcl8\fR. +For Tcl 9.0, the default profile is \fBstrict\fR. .PP For details about profiles, see the \fBPROFILES\fR section in the documentation of the \fBencoding\fR command. diff --git a/doc/encoding.n b/doc/encoding.n index 8ede974..4c37b79 100644 --- a/doc/encoding.n +++ b/doc/encoding.n @@ -120,7 +120,7 @@ Continue further processing of the source data using a fallback strategy such as replacing or discarding the offending bytes in a profile-defined manner. .VE "TCL8.7 TIP656" .PP -The following profiles are currently implemented with \fBtcl8\fR being +The following profiles are currently implemented with \fBstrict\fR being the default if the \fB-profile\fR is not specified. .VS "TCL8.7 TIP656" .TP @@ -146,7 +146,7 @@ the question mark \fB?\fR. \fBstrict\fR . The \fBstrict\fR profile always stops processing when an conversion error is -encountered. The error is signalled via an exception or the \fB-failindex\fR +encountered. The error is signaled via an exception or the \fB-failindex\fR option mechanism. The \fBstrict\fR profile implements a Unicode standard conformant behavior. .TP @@ -206,7 +206,7 @@ unexpected byte sequence starting at index 1: '\ex80' Example 3: Get partial data and the error location: .PP .CS -% codepoints [encoding convertfrom -profile strict -failindex idx ascii AB\ex80] +% codepoints [encoding convertfrom -failindex idx ascii AB\ex80] U+000041 U+000042 % set idx 2 @@ -219,7 +219,7 @@ Example 4: Encode a character that is not representable in ISO8859-1: A? % encoding convertto -profile strict iso8859-1 A\eu0141 unexpected character at index 1: 'U+000141' -% encoding convertto -profile strict -failindex idx iso8859-1 A\eu0141 +% encoding convertto -failindex idx iso8859-1 A\eu0141 A % set idx 1 -- cgit v0.12