From b432766202acec5c8414a7a3409d64807a652564 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Wed, 23 Nov 2022 12:15:24 +0000 Subject: Make "read" throwing a "illegal byte sequence" exception. Doesn't work for "gets" yet. --- generic/tclIO.c | 32 ++++++++++++++++++++++++++------ tests/io.test | 2 +- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/generic/tclIO.c b/generic/tclIO.c index 652043c..f5344b3 100644 --- a/generic/tclIO.c +++ b/generic/tclIO.c @@ -397,9 +397,9 @@ ChanClose( * calling Tcl_GetErrno(). * * Side effects: - * The CHANNEL_BLOCKED and CHANNEL_EOF flags of the channel state are set - * as appropriate. On EOF, the inputEncodingFlags are set to perform - * ending operations on decoding. + * The CHANNEL_ENCODING_ERROR, CHANNEL_BLOCKED and CHANNEL_EOF flags + * of the channel state are set as appropriate. On EOF, the + * inputEncodingFlags are set to perform ending operations on decoding. * * TODO - Is this really the right place for that? * @@ -4661,6 +4661,12 @@ Tcl_GetsObj( char *dst, *dstEnd, *eol, *eof; Tcl_EncodingState oldState; + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); + return TCL_INDEX_NONE; + } + if (CheckChannelErrors(statePtr, TCL_READABLE) != 0) { return TCL_INDEX_NONE; } @@ -5031,6 +5037,7 @@ Tcl_GetsObj( done: assert(!GotFlag(statePtr, CHANNEL_EOF) || GotFlag(statePtr, CHANNEL_STICKY_EOF) + || GotFlag(statePtr, CHANNEL_ENCODING_ERROR) || Tcl_InputBuffered((Tcl_Channel)chanPtr) == 0); assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED) == (CHANNEL_EOF|CHANNEL_BLOCKED))); @@ -6016,6 +6023,12 @@ DoReadChars( } } + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + /* TODO: We don't need this call? */ + UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); + return -1; + } /* * Early out when next read will see eofchar. * @@ -10108,6 +10121,11 @@ DoRead( * too. Keep on keeping on for now. */ + if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) { + UpdateInterest(chanPtr); + Tcl_SetErrno(EILSEQ); + return -1; + } if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) { SetFlag(statePtr, CHANNEL_EOF); assert(statePtr->inputEncodingFlags & TCL_ENCODING_END); @@ -10205,10 +10223,10 @@ DoRead( } /* - * 1) We're @EOF because we saw eof char. + * 1) We're @EOF because we saw eof char, or there was an encoding error. */ - if (GotFlag(statePtr, CHANNEL_STICKY_EOF)) { + if (GotFlag(statePtr, CHANNEL_STICKY_EOF|CHANNEL_ENCODING_ERROR)) { break; } @@ -10293,6 +10311,7 @@ DoRead( assert(!GotFlag(statePtr, CHANNEL_EOF) || GotFlag(statePtr, CHANNEL_STICKY_EOF) + || GotFlag(statePtr, CHANNEL_ENCODING_ERROR) || Tcl_InputBuffered((Tcl_Channel)chanPtr) == 0); assert(!(GotFlag(statePtr, CHANNEL_EOF|CHANNEL_BLOCKED) == (CHANNEL_EOF|CHANNEL_BLOCKED))); @@ -11566,8 +11585,8 @@ DumpFlags( char *str, int flags) { - char buf[20]; int i = 0; + char buf[24]; #define ChanFlag(chr, bit) (buf[i++] = ((flags & (bit)) ? (chr) : '_')) @@ -11580,6 +11599,7 @@ DumpFlags( ChanFlag('c', CHANNEL_CLOSED); ChanFlag('E', CHANNEL_EOF); ChanFlag('S', CHANNEL_STICKY_EOF); + ChanFlag('U', CHANNEL_ENCODING_ERROR); ChanFlag('B', CHANNEL_BLOCKED); ChanFlag('/', INPUT_SAW_CR); ChanFlag('D', CHANNEL_DEAD); diff --git a/tests/io.test b/tests/io.test index ef9e14d..d5f10a5 100644 --- a/tests/io.test +++ b/tests/io.test @@ -9063,7 +9063,7 @@ test io-75.6 {invalid utf-8 encoding read is not ignored (-strictencoding 1)} -s lappend hd $msg } -cleanup { removeFile io-75.6 -} -result "41 0 {}" ; # Here, an exception should be thrown +} -match glob -result {41 1 {error reading "*": illegal byte sequence}} # ### ### ### ######### ######### ######### -- cgit v0.12