diff options
-rw-r--r-- | doc/Encoding.3 | 16 | ||||
-rw-r--r-- | generic/tcl.h | 17 | ||||
-rw-r--r-- | generic/tclCmdAH.c | 16 | ||||
-rw-r--r-- | generic/tclEncoding.c | 34 | ||||
-rw-r--r-- | generic/tclZipfs.c | 3 | ||||
-rw-r--r-- | tests/chanio.test | 6 |
6 files changed, 33 insertions, 59 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3 index 663cd3f..d95ca89 100644 --- a/doc/Encoding.3 +++ b/doc/Encoding.3 @@ -99,13 +99,13 @@ converted. \fBTCL_ENCODING_END\fR signifies that the source buffer is the last block in a (potentially multi-block) input stream, telling the conversion routine to perform any finalization that needs to occur after the last byte is converted and then to reset to an initial state. -\fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should -return immediately upon reading a source character that does not exist in -the target encoding; otherwise a default fallback character will -automatically be substituted. The flag \fBTCL_ENCODING_NOCOMPLAIN\fR has -no effect, it is reserved for Tcl 9.0. The flag \fBTCL_ENCODING_MODIFIED\fR makes -\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the -byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. +\fBTCL_ENCODING_NOCOMPLAIN\fR signifies that the conversion routine should +not return immediately upon reading a source character that does not exist in +the target encoding, but it will substitute a default fallback character for +all of such characters. The flag \fBTCL_ENCODING_STOPONERROR\fR has no effect, +it only has meaning in Tcl 8.x. The flag \fBTCL_ENCODING_MODIFIED\fR makes +\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the byte +sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders. .AP Tcl_EncodingState *statePtr in/out Used when converting a (generally long or indefinite length) byte stream in a piece-by-piece fashion. The conversion routine stores its current @@ -236,7 +236,7 @@ if the input stream has been damaged or if the input encoding method was misidentified. .IP \fBTCL_CONVERT_UNKNOWN\fR 29 The source buffer contained a character that could not be represented in -the target encoding and \fBTCL_ENCODING_STOPONERROR\fR was specified. +the target encoding and \fBTCL_ENCODING_NOCOMPLAIN\fR was not specified. .RE .LP \fBTcl_UtfToExternalDString\fR converts a source buffer \fIsrc\fR from UTF-8 diff --git a/generic/tcl.h b/generic/tcl.h index 2757eff..40ea97d 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -1865,14 +1865,7 @@ typedef struct Tcl_EncodingType { * reset to an initial state. If the source * buffer contains the entire input stream to be * converted, this flag should be set. - * TCL_ENCODING_STOPONERROR - If set, the converter returns immediately upon - * encountering an invalid byte sequence or a - * source character that has no mapping in the - * target encoding. If clear, the converter - * substitutes the problematic character(s) with - * one or more "close" characters in the - * destination buffer and then continues to - * convert the source. Only for Tcl 8.x. + * TCL_ENCODING_STOPONERROR - Not used any more. * TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a * terminating NUL byte. Since it does not need * an extra byte for a terminating NUL, it fills @@ -1903,7 +1896,7 @@ typedef struct Tcl_EncodingType { #define TCL_ENCODING_START 0x01 #define TCL_ENCODING_END 0x02 -#define TCL_ENCODING_STOPONERROR 0x04 +#define TCL_ENCODING_STOPONERROR 0x0 /* Not used any more */ #define TCL_ENCODING_NO_TERMINATE 0x08 #define TCL_ENCODING_CHAR_LIMIT 0x10 #define TCL_ENCODING_MODIFIED 0x20 @@ -1929,12 +1922,12 @@ typedef struct Tcl_EncodingType { * character sequence. This may occur if the * input stream has been damaged or if the input * encoding method was misidentified. This error - * is reported only if TCL_ENCODING_STOPONERROR + * is reported unless if TCL_ENCODING_NOCOMPLAIN * was specified. * TCL_CONVERT_UNKNOWN - The source string contained a character that * could not be represented in the target - * encoding. This error is reported only if - * TCL_ENCODING_STOPONERROR was specified. + * encoding. This error is reported unless if + * TCL_ENCODING_NOCOMPLAIN was specified. */ #define TCL_CONVERT_MULTIBYTE (-1) diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 36d9867..fe14ae8 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -414,11 +414,7 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length = 0; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int flags = TCL_ENCODING_STOPONERROR; -#else - int flags = TCL_ENCODING_NOCOMPLAIN; -#endif + int flags = 0; size_t result; if (objc == 2) { @@ -459,7 +455,7 @@ encConvFromOK: } result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, flags, &ds); - if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN) && (result != (size_t)-1)) { char buf[TCL_INTEGER_SPACE]; sprintf(buf, "%" TCL_Z_MODIFIER "u", result); Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" @@ -513,11 +509,7 @@ EncodingConverttoObjCmd( size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ size_t result; -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) - int flags = TCL_ENCODING_STOPONERROR; -#else - int flags = TCL_ENCODING_NOCOMPLAIN; -#endif + int flags = 0; if (objc == 2) { encoding = Tcl_GetEncoding(interp, NULL); @@ -555,7 +547,7 @@ encConvToOK: stringPtr = Tcl_GetStringFromObj(data, &length); result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, flags, &ds); - if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN) && (result != (size_t)-1)) { size_t pos = Tcl_NumUtfChars(stringPtr, result); int ucs4; char buf[TCL_INTEGER_SPACE]; diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7b77282..d735fe3 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -1089,11 +1089,9 @@ Tcl_ExternalToUtfDString( * Tcl_ExternalToUtfDStringEx -- * * Convert a source buffer from the specified encoding into UTF-8. -* The parameter flags controls the behavior, if any of the bytes in + * The parameter flags controls the behavior, if any of the bytes in * the source buffer are invalid or cannot be represented in utf-8. * Possible flags values: - * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but - * return the first error position (Default in Tcl 9.0). * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. @@ -1332,8 +1330,6 @@ Tcl_UtfToExternalDString( * the source buffer are invalid or cannot be represented in the * target encoding. * Possible flags values: - * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but - * return the first error position (Default in Tcl 9.0). * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default * fallback character. Always return -1 (Default in Tcl 8.7). * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00. @@ -2225,12 +2221,6 @@ BinaryProc( *------------------------------------------------------------------------- */ -#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) -# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN) -#else -# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR) -#endif - static int UtfToUtfProc( ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */ @@ -2313,7 +2303,7 @@ UtfToUtfProc( */ if (flags & TCL_ENCODING_MODIFIED) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_MULTIBYTE; break; } @@ -2328,7 +2318,7 @@ UtfToUtfProc( int low; const char *saveSrc = src; size_t len = TclUtfToUCS4(src, &ch); - if ((len < 2) && (ch != 0) && STOPONERROR + if ((len < 2) && (ch != 0) && !(flags & TCL_ENCODING_NOCOMPLAIN) && (flags & TCL_ENCODING_MODIFIED)) { result = TCL_CONVERT_SYNTAX; break; @@ -2354,7 +2344,7 @@ UtfToUtfProc( if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2369,7 +2359,7 @@ UtfToUtfProc( dst += Tcl_UniCharToUtf(ch, dst); ch = low; } else if (!Tcl_UniCharIsUnicode(ch)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; src = saveSrc; break; @@ -2555,7 +2545,7 @@ UtfToUtf32Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2758,7 +2748,7 @@ UtfToUtf16Proc( } len = TclUtfToUCS4(src, &ch); if (!Tcl_UniCharIsUnicode(ch)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -2978,7 +2968,7 @@ TableToUtfProc( ch = pageZero[byte]; } if ((ch == 0) && (byte != 0)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_SYNTAX; break; } @@ -3094,7 +3084,7 @@ TableFromUtfProc( word = fromUnicode[(ch >> 8)][ch & 0xFF]; if ((word == 0) && (ch != 0)) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3282,7 +3272,7 @@ Iso88591FromUtfProc( || ((ch >= 0xD800) && (len < 3)) #endif ) { - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; break; } @@ -3509,7 +3499,7 @@ EscapeToUtfProc( if ((checked == dataPtr->numSubTables + 2) || (flags & TCL_ENCODING_END)) { - if (!STOPONERROR) { + if (!!(flags & TCL_ENCODING_NOCOMPLAIN)) { /* * Skip the unknown escape sequence. */ @@ -3684,7 +3674,7 @@ EscapeFromUtfProc( if (word == 0) { state = oldState; - if (STOPONERROR) { + if (!(flags & TCL_ENCODING_NOCOMPLAIN)) { result = TCL_CONVERT_UNKNOWN; break; } diff --git a/generic/tclZipfs.c b/generic/tclZipfs.c index 5963f1b..4aea20c 100644 --- a/generic/tclZipfs.c +++ b/generic/tclZipfs.c @@ -737,8 +737,7 @@ DecodeZipEntryText( src = (const char *) inputBytes; dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; - flags = TCL_ENCODING_START | TCL_ENCODING_END | - TCL_ENCODING_STOPONERROR; /* Special flag! */ + flags = TCL_ENCODING_START | TCL_ENCODING_END; /* Special flag! */ while (1) { int srcRead, dstWrote; diff --git a/tests/chanio.test b/tests/chanio.test index 11a4e74..e668655 100644 --- a/tests/chanio.test +++ b/tests/chanio.test @@ -255,8 +255,8 @@ test chan-io-3.4 {WriteChars: loop over stage buffer} -body { chan configure $f -encoding jis0208 -buffersize 16 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] +} -cleanup { chan close $f - lappend x [contents $path(test1)] } -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test chan-io-3.5 {WriteChars: saved != 0} -body { # Bytes produced by UtfToExternal from end of last channel buffer had to @@ -266,8 +266,8 @@ test chan-io-3.5 {WriteChars: saved != 0} -body { chan configure $f -encoding jis0208 -buffersize 17 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] +} -cleanup { chan close $f - lappend x [contents $path(test1)] } -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} { # One incomplete UTF-8 character at end of staging buffer. Backup in src @@ -295,8 +295,8 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body { chan configure $f -encoding jis0208 -buffersize 17 chan puts -nonewline $f "\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\" set x [list [contents $path(test1)]] +} -cleanup { chan close $f - lappend x [contents $path(test1)] } -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence} test chan-io-3.8 {WriteChars: reset sawLF after each buffer} { set f [open $path(test1) w] |