diff options
| -rw-r--r-- | generic/tcl.h | 56 | ||||
| -rw-r--r-- | generic/tclEncoding.c | 12 | ||||
| -rw-r--r-- | generic/tclIO.c | 4 | ||||
| -rw-r--r-- | generic/tclIOCmd.c | 1 | ||||
| -rw-r--r-- | tests/encoding.test | 48 | ||||
| -rw-r--r-- | tests/io.test | 57 | ||||
| -rw-r--r-- | tests/ioCmd.test | 4 | ||||
| -rw-r--r-- | tools/regexpTestLib.tcl | 2 |
8 files changed, 108 insertions, 76 deletions
diff --git a/generic/tcl.h b/generic/tcl.h index 874d75f..2b53925 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -306,16 +306,30 @@ typedef unsigned TCL_WIDE_INT_TYPE Tcl_WideUInt; # define TCL_Z_MODIFIER "" # endif #endif /* !TCL_Z_MODIFIER */ +#ifndef TCL_T_MODIFIER +# if defined(__GNUC__) && !defined(_WIN32) +# define TCL_T_MODIFIER "t" +# elif defined(_WIN64) +# define TCL_T_MODIFIER TCL_LL_MODIFIER +# else +# define TCL_T_MODIFIER TCL_Z_MODIFIER +# endif +#endif /* !TCL_T_MODIFIER */ + #define Tcl_WideAsLong(val) ((long)((Tcl_WideInt)(val))) #define Tcl_LongAsWide(val) ((Tcl_WideInt)((long)(val))) #define Tcl_WideAsDouble(val) ((double)((Tcl_WideInt)(val))) #define Tcl_DoubleAsWide(val) ((Tcl_WideInt)((double)(val))) #if TCL_MAJOR_VERSION < 9 -typedef int Tcl_Size; + typedef int Tcl_Size; +# define TCL_SIZE_MODIFIER "" +# define TCL_SIZE_MAX INT_MAX #else -typedef size_t Tcl_Size; -#endif + typedef size_t Tcl_Size; +# define TCL_SIZE_MAX PTRDIFF_MAX +# define TCL_SIZE_MODIFIER TCL_T_MODIFIER +#endif /* TCL_MAJOR_VERSION */ #ifdef _WIN32 # if TCL_MAJOR_VERSION > 8 || defined(_WIN64) || defined(_USE_64BIT_TIME_T) @@ -450,38 +464,30 @@ typedef void (Tcl_ThreadCreateProc) (void *clientData); * string. */ -#if TCL_MAJOR_VERSION > 8 typedef struct Tcl_RegExpIndices { +#if TCL_MAJOR_VERSION > 8 Tcl_Size start; /* Character offset of first character in * match. */ Tcl_Size end; /* Character offset of first character after * the match. */ +#else + long start; + long end; +#endif } Tcl_RegExpIndices; typedef struct Tcl_RegExpInfo { Tcl_Size nsubs; /* Number of subexpressions in the compiled * expression. */ Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ +#if TCL_MAJOR_VERSION > 8 Tcl_Size extendStart; /* The offset at which a subsequent match * might begin. */ -} Tcl_RegExpInfo; #else -typedef struct Tcl_RegExpIndices { - long start; /* Character offset of first character in - * match. */ - long end; /* Character offset of first character after - * the match. */ -} Tcl_RegExpIndices; - -typedef struct Tcl_RegExpInfo { - int nsubs; /* Number of subexpressions in the compiled - * expression. */ - Tcl_RegExpIndices *matches; /* Array of nsubs match offset pairs. */ - long extendStart; /* The offset at which a subsequent match - * might begin. */ + long extendStart; long reserved; /* Reserved for later use. */ -} Tcl_RegExpInfo; #endif +} Tcl_RegExpInfo; /* * Picky compilers complain if this typdef doesn't appear before the struct's @@ -1774,7 +1780,7 @@ typedef struct Tcl_Token { * TCL_TOKEN_OPERATOR - The token describes one expression operator. * An operator might be the name of a math * function such as "abs". A TCL_TOKEN_OPERATOR - * token is always preceeded by one + * token is always preceded by one * TCL_TOKEN_SUB_EXPR token for the operator's * subexpression, and is followed by zero or more * TCL_TOKEN_SUB_EXPR tokens for the operator's @@ -2014,11 +2020,11 @@ typedef struct Tcl_EncodingType { */ #ifndef TCL_UTF_MAX -#if TCL_MAJOR_VERSION > 8 -#define TCL_UTF_MAX 4 -#else -#define TCL_UTF_MAX 3 -#endif +# if TCL_MAJOR_VERSION > 8 +# define TCL_UTF_MAX 4 +# else +# define TCL_UTF_MAX 3 +# endif #endif /* diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 6feb686..9b1894e 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -610,7 +610,7 @@ TclInitEncodingSubsystem(void) type.nullSize = 1; type.clientData = INT2PTR(ENCODING_UTF); Tcl_CreateEncoding(&type); - type.clientData = INT2PTR(0); + type.clientData = NULL; type.encodingName = "cesu-8"; Tcl_CreateEncoding(&type); @@ -622,7 +622,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "ucs-2be"; - type.clientData = INT2PTR(0); + type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "ucs-2"; type.clientData = INT2PTR(leFlags); @@ -636,7 +636,7 @@ TclInitEncodingSubsystem(void) type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-32be"; - type.clientData = INT2PTR(0); + type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "utf-32"; type.clientData = INT2PTR(leFlags); @@ -647,13 +647,13 @@ TclInitEncodingSubsystem(void) type.freeProc = NULL; type.nullSize = 2; type.encodingName = "utf-16le"; - type.clientData = INT2PTR(TCL_ENCODING_LE|ENCODING_UTF); + type.clientData = INT2PTR(TCL_ENCODING_LE); Tcl_CreateEncoding(&type); type.encodingName = "utf-16be"; - type.clientData = INT2PTR(ENCODING_UTF); + type.clientData = NULL; Tcl_CreateEncoding(&type); type.encodingName = "utf-16"; - type.clientData = INT2PTR(leFlags|ENCODING_UTF); + type.clientData = INT2PTR(leFlags); Tcl_CreateEncoding(&type); #ifndef TCL_NO_DEPRECATED diff --git a/generic/tclIO.c b/generic/tclIO.c index 4a88f52..43be5d3 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -4616,7 +4616,7 @@ Tcl_GetsObj( if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { UpdateInterest(chanPtr); - ResetFlag(statePtr, CHANNEL_ENCODING_ERROR); + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); Tcl_SetErrno(EILSEQ); return TCL_INDEX_NONE; } @@ -5941,6 +5941,7 @@ DoReadChars( int factor = UTF_EXPANSION_FACTOR; if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED); /* TODO: We don't need this call? */ UpdateInterest(chanPtr); Tcl_SetErrno(EILSEQ); @@ -6117,6 +6118,7 @@ finish: * succesfully red before the error. Return an error so that callers * like [read] can also return an error. */ + ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR); Tcl_SetErrno(EILSEQ); copied = -1; } diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c index 5a766c6..7411855 100644 --- a/generic/tclIOCmd.c +++ b/generic/tclIOCmd.c @@ -433,6 +433,7 @@ Tcl_ReadObjCmd( TclChannelPreserve(chan); charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0); if (charactersRead == TCL_IO_FAILURE) { + Tcl_DecrRefCount(resultPtr); /* * TIP #219. * Capture error messages put by the driver into the bypass area and diff --git a/tests/encoding.test b/tests/encoding.test index 35340a6..09f3e42 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -465,7 +465,7 @@ test encoding-15.25 {UtfToUtfProc CESU-8} { encoding convertfrom cesu-8 \x00 } \x00 test encoding-15.26 {UtfToUtfProc CESU-8} { - encoding convertfrom cesu-8 \xC0\x80 + encoding convertfrom -profile tcl8 cesu-8 \xC0\x80 } \x00 test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} { encoding convertfrom -profile strict cesu-8 \x00 @@ -492,7 +492,7 @@ test encoding-16.2 {Utf16ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.3 {Utf16ToUtfProc} -body { - set val [encoding convertfrom utf-16 "\xDC\xDC"] + set val [encoding convertfrom -profile tcl8 utf-16 "\xDC\xDC"] list $val [format %x [scan $val %c]] } -result "\uDCDC dcdc" test encoding-16.4 {Ucs2ToUtfProc} -body { @@ -504,11 +504,11 @@ test encoding-16.5 {Ucs2ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\U460DC 460dc" test encoding-16.6 {Utf32ToUtfProc} -body { - set val [encoding convertfrom utf-32le NN\0\0] + set val [encoding convertfrom -profile strict utf-32le NN\0\0] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.7 {Utf32ToUtfProc} -body { - set val [encoding convertfrom utf-32be \0\0NN] + set val [encoding convertfrom -profile strict utf-32be \0\0NN] list $val [format %x [scan $val %c]] } -result "乎 4e4e" test encoding-16.8 {Utf32ToUtfProc} -body { @@ -516,28 +516,28 @@ test encoding-16.8 {Utf32ToUtfProc} -body { list $val [format %x [scan $val %c]] } -result "\uFFFD fffd" test encoding-16.9 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00 } -result \uD800 test encoding-16.10 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00 } -result \uDC00 test encoding-16.11 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xD8\x00\x00\x00\xDC\x00\x00 } -result \uD800\uDC00 test encoding-16.12 {Utf32ToUtfProc} -constraints utf32 -body { - encoding convertfrom utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 + encoding convertfrom -profile tcl8 utf-32le \x00\xDC\x00\x00\x00\xD8\x00\x00 } -result \uDC00\uD800 test encoding-16.13 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xD8 } -result \uD800 test encoding-16.14 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC + encoding convertfrom -profile tcl8 utf-16le \x00\xDC } -result \uDC00 test encoding-16.15 {Utf16ToUtfProc} -body { encoding convertfrom utf-16le \x00\xD8\x00\xDC } -result \U010000 test encoding-16.16 {Utf16ToUtfProc} -body { - encoding convertfrom utf-16le \x00\xDC\x00\xD8 + encoding convertfrom -profile tcl8 utf-16le \x00\xDC\x00\xD8 } -result \uDC00\uD800 test encoding-16.17 {Utf32ToUtfProc} -body { list [encoding convertfrom -profile strict -failindex idx utf-32le \x41\x00\x00\x00\x00\xD8\x00\x00\x42\x00\x00\x00] [set idx] @@ -563,13 +563,13 @@ test encoding-16.18 { } [namespace current]] } -result done test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-16 "\x41\x41\x41" + encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41" } -result \u4141\uFFFD test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body { encoding convertfrom utf-16 "\xD8\xD8" } -result \uD8D8 -test encoding-16.21 {Utf16ToUtfProc, bug [d19fe0a5b]} -body { - encoding convertfrom utf-32 "\x00\x00\x00\x00\x41\x41" +test encoding-16.21 {Utf32ToUtfProc, bug [d19fe0a5b]} -body { + encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41" } -result \x00\uFFFD test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xD8 @@ -578,10 +578,10 @@ test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body { encoding convertfrom -profile strict utf-16le \x00\xDC } -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-16.24 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32 "\xFF\xFF\xFF\xFF" + encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF" } -result \uFFFD test encoding-16.25 {Utf32ToUtfProc} -body { - encoding convertfrom utf-32 "\x01\x00\x00\x01" + encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01" } -result \uFFFD test encoding-17.1 {UtfToUtf16Proc} -body { @@ -596,10 +596,10 @@ test encoding-17.3 {UtfToUtf16Proc} -body { test encoding-17.4 {UtfToUtf16Proc} -body { encoding convertto -profile tcl8 utf-16le "\uD8D8" } -result "\xD8\xD8" -test encoding-17.5 {UtfToUtf16Proc} -body { +test encoding-17.5 {UtfToUtf32Proc} -body { encoding convertto utf-32le "\U460DC" } -result "\xDC\x60\x04\x00" -test encoding-17.6 {UtfToUtf16Proc} -body { +test encoding-17.6 {UtfToUtf32Proc} -body { encoding convertto utf-32be "\U460DC" } -result "\x00\x04\x60\xDC" test encoding-17.7 {UtfToUtf16Proc} -body { @@ -622,7 +622,7 @@ test encoding-17.12 {Utf32ToUtfProc} -body { } -returnCodes error -result {unexpected byte sequence starting at index 0: '\x00'} test encoding-18.1 {TableToUtfProc on invalid input} -body { - list [catch {encoding convertto jis0208 \\} res] $res + list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-18.2 {TableToUtfProc on invalid input with -profile strict} -body { list [catch {encoding convertto -profile strict jis0208 \\} res] $res @@ -634,14 +634,14 @@ test encoding-18.4 {TableToUtfProc on invalid input with -failindex -profile str list [catch {encoding convertto -failindex pos -profile strict jis0208 \\} res] $res $pos } -result {0 {} 0} test encoding-18.5 {TableToUtfProc on invalid input with -failindex} -body { - list [catch {encoding convertto -failindex pos jis0208 \\} res] $res $pos + list [catch {encoding convertto -profile tcl8 -failindex pos jis0208 \\} res] $res $pos } -result {0 !) -1} test encoding-18.6 {TableToUtfProc on invalid input with -profile tcl8} -body { list [catch {encoding convertto -profile tcl8 jis0208 \\} res] $res } -result {0 !)} test encoding-19.1 {TableFromUtfProc} -body { - encoding convertfrom ascii AÁ + encoding convertfrom -profile tcl8 ascii AÁ } -result AÁ test encoding-19.2 {TableFromUtfProc} -body { encoding convertfrom -profile tcl8 ascii AÁ @@ -650,7 +650,7 @@ test encoding-19.3 {TableFromUtfProc} -body { encoding convertfrom -profile strict ascii AÁ } -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xC1'} test encoding-19.4 {TableFromUtfProc} -body { - list [encoding convertfrom -failindex idx ascii AÁ] [set idx] + list [encoding convertfrom -profile tcl8 -failindex idx ascii AÁ] [set idx] } -result [list A\xC1 -1] test encoding-19.5 {TableFromUtfProc} -body { list [encoding convertfrom -failindex idx -profile strict ascii A\xC1] [set idx] @@ -799,7 +799,7 @@ test encoding-24.14 {Parse valid or invalid utf-8} { string length [encoding convertfrom utf-8 "\xC2\x80"] } 1 test encoding-24.15 {Parse valid or invalid utf-8} -body { - encoding convertfrom utf-8 "Z\xE0\x80" + encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80" } -result Z\xE0\u20AC test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body { encoding convertto utf-8 [testbytestring "Z\u4343\x80"] @@ -862,7 +862,7 @@ test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body { encoding convertto -profile tcl8 utf-8 \uFFFF } -result \xEF\xBF\xBF test encoding-24.35 {Parse invalid utf-8} -constraints utf32 -body { - encoding convertfrom utf-8 \xED\xA0\x80 + encoding convertfrom -profile tcl8 utf-8 \xED\xA0\x80 } -result \uD800 test encoding-24.36 {Parse invalid utf-8 with -profile strict} -body { encoding convertfrom -profile strict utf-8 \xED\xA0\x80 diff --git a/tests/io.test b/tests/io.test index d98317a..3ab09e8 100644 --- a/tests/io.test +++ b/tests/io.test @@ -1620,7 +1620,7 @@ test io-12.9 {ReadChars: multibyte chars split} -body { puts -nonewline $f [string repeat a 9]\xC2 close $f set f [open $path(test1)] - fconfigure $f -encoding utf-8 -buffersize 10 + fconfigure $f -encoding utf-8 -profile tcl8 -buffersize 10 set in [read $f] close $f scan [string index $in end] %c @@ -9273,7 +9273,6 @@ test io-75.6 {invalid utf-8 encoding, gets is not ignored (-profile strict)} -se fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ -translation lf -profile strict } -body { -after 1 gets $f } -cleanup { close $f @@ -9323,6 +9322,30 @@ test io-75.8 {invalid utf-8 encoding eof handling (-profile strict)} -setup { removeFile io-75.8 } -result {41 1 {}} +test io-75.8.eoflater {invalid utf-8 encoding eof handling (-profile strict)} -setup { + set res {} + set fn [makeFile {} io-75.8] + set f [open $fn w+] + fconfigure $f -encoding binary + # \x81 is invalid in utf-8. -eofchar is not detected, because it comes later. + puts -nonewline $f A\x81\x1A + flush $f + seek $f 0 + fconfigure $f -encoding utf-8 -buffering none -eofchar \x1A \ + -translation lf -profile strict +} -body { + after 1 + set status [catch {read $f} cres copts] + lappend res $status + lappend res [eof $f] + chan configure $f -encoding iso8859-1 + lappend res [read $f] + close $f + set res +} -cleanup { + removeFile io-75.8 +} -result "1 0 \x81" + test io-75.9 {unrepresentable character write passes and is replaced by ?} -setup { set fn [makeFile {} io-75.9] set f [open $fn w+] @@ -9450,23 +9473,23 @@ test io-75.14 { invalid utf-8 encoding [gets] continues in non-strict mode after error } -setup { - set chan [file tempfile] - fconfigure $f -encoding binary + set chan [file tempfile] + fconfigure $chan -encoding binary # \xc0\n is an invalid utf-8 sequence - puts -nonewline $f a\nb\nc\xc0\nd\n - flush $f - seek $f 0 - fconfigure $f -encoding utf-8 -buffering none -eofchar {} \ - -translation lf -profile strict + puts -nonewline $chan a\nb\nc\xc0\nd\n + flush $chan + seek $chan 0 + fconfigure $chan -encoding utf-8 -buffering none -eofchar {} \ + -translation auto -profile strict } -body { - lappend res [gets $f] - lappend res [gets $f] - set status [catch {gets $f} cres copts] + lappend res [gets $chan] + lappend res [gets $chan] + set status [catch {gets $chan} cres copts] lappend res $status $cres - chan configure $f -profile tcl8 - lappend res [gets $f] - lappend res [gets $f] - close $f + chan configure $chan -profile tcl8 + lappend res [gets $chan] + lappend res [gets $chan] + close $chan return $res } -match glob -result {a b 1 {error reading "*":\ invalid or incomplete multibyte or wide character} cÀ d} @@ -9497,7 +9520,7 @@ test io-75.15 { scan $char %c ord lappend res [format %x $ord] } - fconfigure $chan -encoding utf-8 -profile strict + fconfigure $chan -encoding utf-8 -profile strict -translation auto lappend res [gets $chan] lappend res [gets $chan] return $res diff --git a/tests/ioCmd.test b/tests/ioCmd.test index 4163b1b..1c06ba3 100644 --- a/tests/ioCmd.test +++ b/tests/ioCmd.test @@ -1058,7 +1058,7 @@ test iocmd-23.1 {chan read, regular data return} -match glob -body { rename foo {} set res } -result {{read rc* 4096} {read rc* 4096} snarfsnarf} -test iocmd-23.2 {chan read, bad data return, to much} -match glob -body { +test iocmd-23.2 {chan read, bad data return, too much} -match glob -body { set res {} proc foo {args} { oninit; onfinal; track @@ -2369,7 +2369,7 @@ test iocmd.tf-23.1 {chan read, regular data return} -match glob -body { rename foo {} set res } -constraints {testchannel thread} -result {{read rc* 4096} {read rc* 4096} snarfsnarf} -test iocmd.tf-23.2 {chan read, bad data return, to much} -match glob -body { +test iocmd.tf-23.2 {chan read, bad data return, too much} -match glob -body { set res {} proc foo {args} { oninit; onfinal; track diff --git a/tools/regexpTestLib.tcl b/tools/regexpTestLib.tcl index 2687e67..c5c156e 100644 --- a/tools/regexpTestLib.tcl +++ b/tools/regexpTestLib.tcl @@ -42,7 +42,7 @@ proc readInputFile {} { # # strings with embedded @'s are truncated -# unpreceeded @'s are replaced by {} +# unpreceded @'s are replaced by {} # proc removeAts {ls} { set len [llength $ls] |
