summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--doc/OpenFileChnl.34
-rw-r--r--generic/tclCmdAH.c4
-rw-r--r--generic/tclIO.c103
-rw-r--r--generic/tclIOCmd.c15
-rw-r--r--generic/tclInt.h1
-rw-r--r--tests/encoding.test43
6 files changed, 66 insertions, 104 deletions
diff --git a/doc/OpenFileChnl.3 b/doc/OpenFileChnl.3
index 4f407b6..3a7b6ae 100644
--- a/doc/OpenFileChnl.3
+++ b/doc/OpenFileChnl.3
@@ -482,10 +482,6 @@ end-of-line character. When -1 is returned, the \fBTcl_InputBlocked\fR
procedure may be invoked to determine if the channel is blocked because
of input unavailability.
.PP
-If the channel is in blocking mode, it might be that there is data available
-but - at the same time - an encoding error occurred. In that case, the
-POSIX error EILSEQ will be recorded.
-.PP
\fBTcl_Gets\fR is the same as \fBTcl_GetsObj\fR except the resulting
characters are appended to the dynamic string given by
\fIlineRead\fR rather than a Tcl value.
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index e8eb26a..ae1ba33 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -435,11 +435,7 @@ EncodingConvertParseOptions (
Tcl_Encoding encoding;
Tcl_Obj *dataObj;
Tcl_Obj *failVarObj;
-#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
int profile = TCL_ENCODING_PROFILE_STRICT;
-#else
- int profile = TCL_ENCODING_PROFILE_TCL8;
-#endif
/*
* Possible combinations:
diff --git a/generic/tclIO.c b/generic/tclIO.c
index b7282c9..fb399d4 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -223,8 +223,8 @@ static void StopCopy(CopyState *csPtr);
static void TranslateInputEOL(ChannelState *statePtr, char *dst,
const char *src, int *dstLenPtr, int *srcLenPtr);
static void UpdateInterest(Channel *chanPtr);
-static int Write(Channel *chanPtr, const char *src,
- int srcLen, Tcl_Encoding encoding);
+static Tcl_Size Write(Channel *chanPtr, const char *src,
+ Tcl_Size srcLen, Tcl_Encoding encoding);
static Tcl_Obj * FixLevelCode(Tcl_Obj *msg);
static void SpliceChannel(Tcl_Channel chan);
static void CutChannel(Tcl_Channel chan);
@@ -4189,6 +4189,7 @@ Tcl_WriteChars(
}
objPtr = Tcl_NewStringObj(src, len);
+ Tcl_IncrRefCount(objPtr);
src = (char *) Tcl_GetByteArrayFromObj(objPtr, &len);
if (src == NULL) {
Tcl_SetErrno(EILSEQ);
@@ -4237,7 +4238,7 @@ Tcl_WriteObj(
Channel *chanPtr;
ChannelState *statePtr; /* State info for channel */
const char *src;
- Tcl_Size srcLen;
+ Tcl_Size srcLen = 0;
statePtr = ((Channel *) chan)->state;
chanPtr = statePtr->topChanPtr;
@@ -4246,31 +4247,20 @@ Tcl_WriteObj(
return TCL_INDEX_NONE;
}
if (statePtr->encoding == NULL) {
+ Tcl_Size result;
+
src = (char *) Tcl_GetByteArrayFromObj(objPtr, &srcLen);
if (src == NULL) {
Tcl_SetErrno(EILSEQ);
- return TCL_INDEX_NONE;
+ result = TCL_INDEX_NONE;
+ } else {
+ result = WriteBytes(chanPtr, src, srcLen);
}
+ return result;
} else {
src = Tcl_GetStringFromObj(objPtr, &srcLen);
+ return WriteChars(chanPtr, src, srcLen);
}
-
- size_t totalWritten = 0;
- /*
- * Note original code always called WriteChars even if srcLen 0
- * so we will too.
- */
- do {
- int chunkSize = srcLen > INT_MAX ? INT_MAX : srcLen;
- int written;
- written = WriteChars(chanPtr, src, chunkSize);
- if (written < 0) {
- return TCL_INDEX_NONE;
- }
- totalWritten += written;
- srcLen -= chunkSize;
- } while (srcLen);
- return totalWritten;
}
static void
@@ -4341,17 +4331,18 @@ WillRead(
*----------------------------------------------------------------------
*/
-static int
+static Tcl_Size
Write(
Channel *chanPtr, /* The channel to buffer output for. */
const char *src, /* UTF-8 string to write. */
- int srcLen, /* Length of UTF-8 string in bytes. */
+ Tcl_Size srcLen, /* Length of UTF-8 string in bytes. */
Tcl_Encoding encoding)
{
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
char *nextNewLine = NULL;
- int endEncoding, saved = 0, total = 0, flushed = 0, needNlFlush = 0;
+ int endEncoding, needNlFlush = 0;
+ Tcl_Size saved = 0, total = 0, flushed = 0;
char safe[BUFFER_PADDING];
int encodingError = 0;
@@ -4364,7 +4355,6 @@ Write(
*/
endEncoding = ((statePtr->outputEncodingFlags & TCL_ENCODING_END) != 0);
-
if (GotFlag(statePtr, CHANNEL_LINEBUFFERED)
|| (statePtr->outputTranslation != TCL_TRANSLATE_LF)) {
nextNewLine = (char *)memchr(src, '\n', srcLen);
@@ -4373,7 +4363,8 @@ Write(
while (srcLen + saved + endEncoding > 0 && !encodingError) {
ChannelBuffer *bufPtr;
char *dst;
- int result, srcRead, dstLen, dstWrote, srcLimit = srcLen;
+ int result, srcRead, dstLen, dstWrote;
+ Tcl_Size srcLimit = srcLen;
if (nextNewLine) {
srcLimit = nextNewLine - src;
@@ -4604,8 +4595,8 @@ Tcl_GetsObj(
ChannelState *statePtr = chanPtr->state;
/* State info for channel */
ChannelBuffer *bufPtr;
- int inEofChar, skip, copiedTotal, oldFlags;
- Tcl_Size oldLength, oldRemoved;
+ int inEofChar, skip, copiedTotal, oldFlags, oldRemoved;
+ Tcl_Size oldLength;
Tcl_Encoding encoding;
char *dst, *dstEnd, *eol, *eof;
Tcl_EncodingState oldState;
@@ -4995,13 +4986,11 @@ Tcl_GetsObj(
UpdateInterest(chanPtr);
TclChannelRelease((Tcl_Channel)chanPtr);
if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR) && gs.bytesWrote == 0) {
- if (bufPtr->nextRemoved != oldRemoved) {
- bufPtr->nextRemoved = oldRemoved;
- ResetFlag(statePtr, CHANNEL_ENCODING_ERROR);
- }
+ bufPtr->nextRemoved = oldRemoved;
Tcl_SetErrno(EILSEQ);
copiedTotal = -1;
}
+ ResetFlag(statePtr, CHANNEL_ENCODING_ERROR);
return copiedTotal;
}
@@ -5463,7 +5452,8 @@ FilterInputBytes(
if (result == TCL_CONVERT_UNKNOWN || result == TCL_CONVERT_SYNTAX) {
SetFlag(statePtr, CHANNEL_ENCODING_ERROR);
- ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF);
+ ResetFlag(statePtr, CHANNEL_STICKY_EOF);
+ ResetFlag(statePtr, CHANNEL_EOF);
result = TCL_OK;
}
@@ -5931,14 +5921,15 @@ DoReadChars(
/* State info for channel */
ChannelBuffer *bufPtr;
Tcl_Size copied;
- int result, copiedNow;
+ int result;
Tcl_Encoding encoding = statePtr->encoding;
int binaryMode;
#define UTF_EXPANSION_FACTOR 1024
int factor = UTF_EXPANSION_FACTOR;
if (GotFlag(statePtr, CHANNEL_ENCODING_ERROR)) {
- /* TODO: We don't need this call? */
+ ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
+ /* TODO: UpdateInterest not needed here? */
UpdateInterest(chanPtr);
Tcl_SetErrno(EILSEQ);
return -1;
@@ -5955,7 +5946,7 @@ DoReadChars(
assert(statePtr->inputEncodingFlags & TCL_ENCODING_END);
assert(!GotFlag(statePtr, CHANNEL_BLOCKED|INPUT_SAW_CR));
- /* TODO: We don't need this call? */
+ /* TODO: UpdateInterest not needed here? */
UpdateInterest(chanPtr);
return 0;
}
@@ -5969,7 +5960,7 @@ DoReadChars(
}
ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF);
statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
- /* TODO: We don't need this call? */
+ /* TODO: UpdateInterest not needed here? */
UpdateInterest(chanPtr);
return 0;
}
@@ -6007,7 +5998,7 @@ DoReadChars(
ResetFlag(statePtr, CHANNEL_BLOCKED|CHANNEL_EOF);
statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
for (copied = 0; toRead > 0 || toRead == TCL_INDEX_NONE; ) {
- copiedNow = -1;
+ int copiedNow = -1;
if (statePtr->inQueueHead != NULL) {
if (binaryMode) {
copiedNow = ReadBytes(statePtr, objPtr, toRead);
@@ -6016,7 +6007,7 @@ DoReadChars(
}
/*
- * If the current buffer is empty recycle it.
+ * Recycle current buffer if empty.
*/
bufPtr = statePtr->inQueueHead;
@@ -6114,10 +6105,9 @@ finish:
* succesfully red before the error. Return an error so that callers
* like [read] can also return an error.
*/
+ ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_ENCODING_ERROR);
Tcl_SetErrno(EILSEQ);
- if (!copied) {
- copied = -1;
- }
+ copied = -1;
}
TclChannelRelease((Tcl_Channel)chanPtr);
return copied;
@@ -7608,32 +7598,6 @@ Tcl_InputBuffered(
return bytesBuffered;
}
-
-/*
- *----------------------------------------------------------------------
- *
- * TclInputEncodingError --
- *
- * Returns 1 if input is in an encoding error position, 0 otherwise.
- *
- * Results:
- * 0 or 1, always.
- *
- * Side effects:
- * None.
- *
- *----------------------------------------------------------------------
- */
-
-int
-TclInputEncodingError(
- Tcl_Channel chan) /* Is this channel blocked? */
-{
- ChannelState *statePtr = ((Channel *) chan)->state;
- /* State of real channel structure. */
-
- return GotFlag(statePtr, CHANNEL_ENCODING_ERROR) ? 1 : 0;
-}
/*
*----------------------------------------------------------------------
@@ -10014,8 +9978,7 @@ CopyData(
* - EOF is reached on the channel; or
* - the channel is non-blocking, and we've read all we can
* without blocking.
- * - a channel reading error occurs (and we return TCL_INDEX_NONE
- * or - in case of encoding error - the data so far)
+ * - a channel reading error occurs (and we return TCL_INDEX_NONE)
*
* Side effects:
* May cause input to be buffered.
diff --git a/generic/tclIOCmd.c b/generic/tclIOCmd.c
index 4cf4631..93c50ec 100644
--- a/generic/tclIOCmd.c
+++ b/generic/tclIOCmd.c
@@ -304,7 +304,7 @@ Tcl_GetsObjCmd(
TclChannelPreserve(chan);
TclNewObj(linePtr);
lineLen = Tcl_GetsObj(chan, linePtr);
- if (lineLen == TCL_INDEX_NONE) {
+ if (lineLen == TCL_IO_FAILURE) {
if (!Tcl_Eof(chan) && !Tcl_InputBlocked(chan)) {
Tcl_DecrRefCount(linePtr);
@@ -323,7 +323,7 @@ Tcl_GetsObjCmd(
code = TCL_ERROR;
goto done;
}
- lineLen = TCL_INDEX_NONE;
+ lineLen = TCL_IO_FAILURE;
}
if (objc == 3) {
if (Tcl_ObjSetVar2(interp, objv[2], NULL, linePtr,
@@ -432,7 +432,7 @@ Tcl_ReadObjCmd(
TclNewObj(resultPtr);
TclChannelPreserve(chan);
charactersRead = Tcl_ReadChars(chan, resultPtr, toRead, 0);
- if (charactersRead == TCL_INDEX_NONE) {
+ if (charactersRead == TCL_IO_FAILURE) {
Tcl_DecrRefCount(resultPtr);
/*
* TIP #219.
@@ -446,15 +446,6 @@ Tcl_ReadObjCmd(
"error reading \"%s\": %s",
TclGetString(chanObjPtr), Tcl_PosixError(interp)));
}
- goto readError;
- } else if (TclInputEncodingError(chan)) {
- Tcl_Obj *returnOpts = Tcl_NewDictObj();
- Tcl_DictObjPut(NULL, returnOpts, Tcl_NewStringObj("-data", TCL_INDEX_NONE), resultPtr);
- Tcl_SetObjResult(interp, Tcl_ObjPrintf(
- "error reading \"%s\": %s",
- TclGetString(chanObjPtr), Tcl_PosixError(interp)));
- Tcl_SetReturnOptions(interp, returnOpts);
- readError:
TclChannelRelease(chan);
return TCL_ERROR;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 03d3e22..436384e 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3224,7 +3224,6 @@ MODULE_SCOPE void TclInitNamespaceSubsystem(void);
MODULE_SCOPE void TclInitNotifier(void);
MODULE_SCOPE void TclInitObjSubsystem(void);
MODULE_SCOPE int TclInterpReady(Tcl_Interp *interp);
-MODULE_SCOPE int TclInputEncodingError(Tcl_Channel chan);
MODULE_SCOPE int TclIsDigitProc(int byte);
MODULE_SCOPE int TclIsBareword(int byte);
MODULE_SCOPE Tcl_Obj * TclJoinPath(Tcl_Size elements, Tcl_Obj * const objv[],
diff --git a/tests/encoding.test b/tests/encoding.test
index 17bf6f5..506ab2c 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -464,7 +464,10 @@ test encoding-15.24 {UtfToUtfProc CESU-8 bug [048dd20b4171c8da]} {
test encoding-15.25 {UtfToUtfProc CESU-8} {
encoding convertfrom cesu-8 \x00
} \x00
-test encoding-15.26 {UtfToUtfProc CESU-8} {
+test {encoding-15.26 cesu-8 tclnull strict} {UtfToUtfProc CESU-8} {
+ encoding convertfrom -profile tcl8 cesu-8 \xC0\x80
+} \x00
+test {encoding-15.26 cesu-8 tclnull tcl8} {UtfToUtfProc CESU-8} {
encoding convertfrom -profile tcl8 cesu-8 \xC0\x80
} \x00
test encoding-15.27 {UtfToUtfProc -profile strict CESU-8} {
@@ -562,24 +565,35 @@ test encoding-16.18 {
return done
} [namespace current]]
} -result done
-test encoding-16.19 {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+test {encoding-16.19 strict} {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom -profile strict utf-16 "\x41\x41\x41"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 2: '\x41'}
+test {encoding-16.19 tcl8} {Utf16ToUtfProc, bug [d19fe0a5b]} -body {
encoding convertfrom -profile tcl8 utf-16 "\x41\x41\x41"
} -result \u4141\uFFFD
-test encoding-16.20 {Utf16ToUtfProc, bug [d19fe0a5b]} -constraints deprecated -body {
+test encoding-16.20 {utf16ToUtfProc, bug [d19fe0a5b]} \
+ -constraints deprecated -body {
encoding convertfrom utf-16 "\xD8\xD8"
} -result \uD8D8
-test encoding-16.21 {Utf32ToUtfProc, bug [d19fe0a5b]} -body {
+test encoding-16.21.tcl8 {Utf32ToUtfProc, bug [d19fe0a5b]} -body {
encoding convertfrom -profile tcl8 utf-32 "\x00\x00\x00\x00\x41\x41"
} -result \x00\uFFFD
+test encoding-16.21.strict {Utf32ToUtfProc, bug [d19fe0a5b]} -body {
+ encoding convertfrom -profile strict utf-32 "\x00\x00\x00\x00\x41\x41"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 4: '\x41'}
+
test encoding-16.22 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
encoding convertfrom -profile strict utf-16le \x00\xD8
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
test encoding-16.23 {Utf16ToUtfProc, strict, bug [db7a085bd9]} -body {
encoding convertfrom -profile strict utf-16le \x00\xDC
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\x00'}
-test encoding-16.24 {Utf32ToUtfProc} -body {
- encoding convertfrom -profile tcl8 utf-32 "\xFF\xFF\xFF\xFF"
-} -result \uFFFD
+test {encoding-24.4 utf-8 invalid strict} {Parse invalid utf-8, strict} -body {
+ string length [encoding convertfrom -profile strict utf-8 "\xC0\x80"]
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test {encoding-24.4 utf-8 invalid tcl8} {UtfToUtfProc utf-8} {
+ encoding convertfrom -profile tcl8 utf-8 \xC0\x80
+} \x00
test encoding-16.25 {Utf32ToUtfProc} -body {
encoding convertfrom -profile tcl8 utf-32 "\x01\x00\x00\x01"
} -result \uFFFD
@@ -789,16 +803,19 @@ test encoding-24.10 {Parse valid or invalid utf-8} {
test encoding-24.11 {Parse valid or invalid utf-8} {
string length [encoding convertfrom -profile tcl8 utf-8 "\xEF\xBF\xBF"]
} 1
-test encoding-24.12 {Parse valid or invalid utf-8} -body {
+test encoding-24.12 {Parse invalid utf-8} -body {
encoding convertfrom -profile strict utf-8 "\xC0\x81"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
-test encoding-24.13 {Parse valid or invalid utf-8} -body {
+test encoding-24.13 {Parse invalid utf-8} -body {
encoding convertfrom -profile strict utf-8 "\xC1\xBF"
} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
-test encoding-24.14 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC2\x80"]
+test encoding-24.14 {Parse valid utf-8} {
+ expr {[encoding convertfrom utf-8 "\xC2\x80"] eq "\u80"}
} 1
-test encoding-24.15 {Parse valid or invalid utf-8} -body {
+test encoding-24.15.strict {Parse invalid utf-8, -profile strict} -body {
+ encoding convertfrom -profile strict utf-8 "Z\xE0\x80"
+} -returnCodes 1 -result "unexpected byte sequence starting at index 1: '\\xE0'"
+test encoding-24.15.tcl8 {Parse invalid utf-8, -profile tcl8} -body {
encoding convertfrom -profile tcl8 utf-8 "Z\xE0\x80"
} -result Z\xE0\u20AC
test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
@@ -855,7 +872,7 @@ test encoding-24.31 {Parse invalid utf-8 with -profile tcl8} -body {
test encoding-24.32 {Try to generate invalid utf-8} -body {
encoding convertto utf-8 \uFFFF
} -result \xEF\xBF\xBF
-test encoding-24.33 {Try to generate noncharacter with -profile strict} -body {
+test encoding-24.33 {Try to generate invalid utf-8} -body {
encoding convertto -profile strict utf-8 \uFFFF
} -result \xEF\xBF\xBF
test encoding-24.34 {Try to generate invalid utf-8 with -profile tcl8} -body {