summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-17 13:44:04 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2022-03-17 13:44:04 (GMT)
commit25473372703a8ba0a8bba93c36afad63b8a9e2f6 (patch)
tree55a8af5f981022aa7202006bd5d7bb3a626e2182
parentfb7684bbbf09e0c6e6328056be102e5069ab600f (diff)
parent7056164cd356e6aed9a9290471a7029c35a606f5 (diff)
downloadtcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.zip
tcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.tar.gz
tcl-25473372703a8ba0a8bba93c36afad63b8a9e2f6.tar.bz2
Merge 9.0
-rw-r--r--doc/Encoding.321
-rw-r--r--generic/tcl.decls8
-rw-r--r--generic/tcl.h18
-rw-r--r--generic/tclCmdAH.c94
-rw-r--r--generic/tclDecls.h20
-rw-r--r--generic/tclEncoding.c136
-rw-r--r--generic/tclStubInit.c4
-rw-r--r--tests/chanio.test12
-rw-r--r--tests/cmdAH.test4
-rw-r--r--tests/encoding.test76
-rw-r--r--tests/http.test4
-rw-r--r--tests/io.test16
-rw-r--r--tests/main.test2
-rw-r--r--tests/safe.test8
-rw-r--r--tests/source.test4
-rw-r--r--win/rules.vc13
16 files changed, 344 insertions, 96 deletions
diff --git a/doc/Encoding.3 b/doc/Encoding.3
index c36744b..663cd3f 100644
--- a/doc/Encoding.3
+++ b/doc/Encoding.3
@@ -25,9 +25,15 @@ int
char *
\fBTcl_ExternalToUtfDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
+size_t
+\fBTcl_ExternalToUtfDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR)
+.sp
char *
\fBTcl_UtfToExternalDString\fR(\fIencoding, src, srcLen, dstPtr\fR)
.sp
+size_t
+\fBTcl_UtfToExternalDStringEx\fR(\fIencoding, src, srcLen, flags, dstPtr\fR)
+.sp
int
\fBTcl_ExternalToUtf\fR(\fIinterp, encoding, src, srcLen, flags, statePtr,
dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr\fR)
@@ -96,7 +102,10 @@ byte is converted and then to reset to an initial state.
\fBTCL_ENCODING_STOPONERROR\fR signifies that the conversion routine should
return immediately upon reading a source character that does not exist in
the target encoding; otherwise a default fallback character will
-automatically be substituted.
+automatically be substituted. The flag \fBTCL_ENCODING_NOCOMPLAIN\fR has
+no effect, it is reserved for Tcl 9.0. The flag \fBTCL_ENCODING_MODIFIED\fR makes
+\fBTcl_UtfToExternalDStringEx\fR and \fBTcl_UtfToExternal\fR produce the
+byte sequence \exC0\ex80 in stead of \ex00, for the utf-8/cesu-8 encoders.
.AP Tcl_EncodingState *statePtr in/out
Used when converting a (generally long or indefinite length) byte stream
in a piece-by-piece fashion. The conversion routine stores its current
@@ -196,6 +205,11 @@ When converting, if any of the characters in the source buffer cannot be
represented in the target encoding, a default fallback character will be
used. The return value is a pointer to the value stored in the DString.
.PP
+\fBTcl_ExternalToUtfDStringEx\fR is the same as \fBTcl_ExternalToUtfDString\fR,
+but it has an additional flags parameter. The return value is the index of
+the first byte in the input string causing a conversion error.
+Or (size_t)-1 if all is OK.
+.PP
\fBTcl_ExternalToUtf\fR converts a source buffer \fIsrc\fR from the specified
\fIencoding\fR into UTF-8. Up to \fIsrcLen\fR bytes are converted from the
source buffer and up to \fIdstLen\fR converted bytes are stored in \fIdst\fR.
@@ -234,6 +248,11 @@ characters in the source buffer cannot be represented in the target
encoding, a default fallback character will be used. The return value is
a pointer to the value stored in the DString.
.PP
+\fBTcl_UtfToExternalDStringEx\fR is the same as \fBTcl_UtfToExternalDString\fR,
+but it has an additional flags parameter. The return value is the index of
+the first byte of an utf-8 byte-sequence in the input string causing a
+conversion error. Or (size_t)-1 if all is OK.
+.PP
\fBTcl_UtfToExternal\fR converts a source buffer \fIsrc\fR from UTF-8 into
the specified \fIencoding\fR. Up to \fIsrcLen\fR bytes are converted from
the source buffer and up to \fIdstLen\fR converted bytes are stored in
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 7d035c5..c2bbf56 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2498,6 +2498,14 @@ declare 656 {
declare 657 {
int Tcl_UniCharIsUnicode(int ch)
}
+declare 658 {
+ size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
+ const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr)
+}
+declare 659 {
+ size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
+ const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr)
+}
# TIP #511
declare 660 {
diff --git a/generic/tcl.h b/generic/tcl.h
index a4dec97..2757eff 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -1869,10 +1869,10 @@ typedef struct Tcl_EncodingType {
* encountering an invalid byte sequence or a
* source character that has no mapping in the
* target encoding. If clear, the converter
- * substitues the problematic character(s) with
+ * substitutes the problematic character(s) with
* one or more "close" characters in the
* destination buffer and then continues to
- * convert the source.
+ * convert the source. Only for Tcl 8.x.
* TCL_ENCODING_NO_TERMINATE - If set, Tcl_ExternalToUtf does not append a
* terminating NUL byte. Since it does not need
* an extra byte for a terminating NUL, it fills
@@ -1887,6 +1887,18 @@ typedef struct Tcl_EncodingType {
* content. Otherwise, the number of chars
* produced is controlled only by other limiting
* factors.
+ * TCL_ENCODING_MODIFIED - Convert NULL bytes to \xC0\x80 in stead of
+ * 0x00. Only valid for "utf-8" and "cesu-8".
+ * This flag is implicit for external -> internal conversions,
+ * optional for internal -> external conversions.
+ * TCL_ENCODING_NOCOMPLAIN - If set, the converter
+ * substitutes the problematic character(s) with
+ * one or more "close" characters in the
+ * destination buffer and then continues to
+ * convert the source. If clear, the converter returns
+ * immediately upon encountering an invalid byte sequence
+ * or a source character that has no mapping in the
+ * target encoding. Only for Tcl 9.x.
*/
#define TCL_ENCODING_START 0x01
@@ -1894,6 +1906,8 @@ typedef struct Tcl_EncodingType {
#define TCL_ENCODING_STOPONERROR 0x04
#define TCL_ENCODING_NO_TERMINATE 0x08
#define TCL_ENCODING_CHAR_LIMIT 0x10
+#define TCL_ENCODING_MODIFIED 0x20
+#define TCL_ENCODING_NOCOMPLAIN 0x40
/*
* The following definitions are the error codes returned by the conversion
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index d58b92c..36d9867 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -377,8 +377,8 @@ TclInitEncodingCmd(
Tcl_Interp* interp) /* Tcl interpreter */
{
static const EnsembleImplMap encodingImplMap[] = {
- {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0},
- {"convertto", EncodingConverttoObjCmd, TclCompileBasic1Or2ArgCmd, NULL, NULL, 0},
+ {"convertfrom", EncodingConvertfromObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0},
+ {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0},
{"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1},
{"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0},
{"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1},
@@ -414,29 +414,61 @@ EncodingConvertfromObjCmd(
Tcl_Encoding encoding; /* Encoding to use */
size_t length = 0; /* Length of the byte array being converted */
const char *bytesPtr; /* Pointer to the first byte of the array */
+#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
+ int flags = TCL_ENCODING_STOPONERROR;
+#else
+ int flags = TCL_ENCODING_NOCOMPLAIN;
+#endif
+ size_t result;
if (objc == 2) {
encoding = Tcl_GetEncoding(interp, NULL);
data = objv[1];
- } else if (objc == 3) {
- if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) {
+ } else if ((unsigned)(objc - 2) < 3) {
+ data = objv[objc - 1];
+ bytesPtr = Tcl_GetString(objv[1]);
+ if (bytesPtr[0] == '-' && bytesPtr[1] == 'n'
+ && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) {
+ flags = TCL_ENCODING_NOCOMPLAIN;
+ } else if (objc < 4) {
+ if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ goto encConvFromOK;
+ } else {
+ goto encConvFromError;
+ }
+ if (objc < 4) {
+ encoding = Tcl_GetEncoding(interp, NULL);
+ } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
return TCL_ERROR;
}
- data = objv[2];
} else {
- Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data");
+ encConvFromError:
+ Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data");
return TCL_ERROR;
}
+encConvFromOK:
/*
* Convert the string into a byte array in 'ds'
*/
bytesPtr = (char *) Tcl_GetBytesFromObj(interp, data, &length);
if (bytesPtr == NULL) {
- Tcl_FreeEncoding(encoding);
return TCL_ERROR;
}
- Tcl_ExternalToUtfDString(encoding, bytesPtr, length, &ds);
+ result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length,
+ flags, &ds);
+ if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) {
+ char buf[TCL_INTEGER_SPACE];
+ sprintf(buf, "%" TCL_Z_MODIFIER "u", result);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %"
+ TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result])));
+ Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
+ buf, NULL);
+ Tcl_DStringFree(&ds);
+ return TCL_ERROR;
+ }
/*
* Note that we cannot use Tcl_DStringResult here because it will
@@ -480,26 +512,62 @@ EncodingConverttoObjCmd(
Tcl_Encoding encoding; /* Encoding to use */
size_t length; /* Length of the string being converted */
const char *stringPtr; /* Pointer to the first byte of the string */
+ size_t result;
+#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
+ int flags = TCL_ENCODING_STOPONERROR;
+#else
+ int flags = TCL_ENCODING_NOCOMPLAIN;
+#endif
if (objc == 2) {
encoding = Tcl_GetEncoding(interp, NULL);
data = objv[1];
- } else if (objc == 3) {
- if (Tcl_GetEncodingFromObj(interp, objv[1], &encoding) != TCL_OK) {
+ } else if ((unsigned)(objc - 2) < 3) {
+ data = objv[objc - 1];
+ stringPtr = Tcl_GetString(objv[1]);
+ if (stringPtr[0] == '-' && stringPtr[1] == 'n'
+ && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) {
+ flags = TCL_ENCODING_NOCOMPLAIN;
+ } else if (objc < 4) {
+ if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
+ return TCL_ERROR;
+ }
+ goto encConvToOK;
+ } else {
+ goto encConvToError;
+ }
+ if (objc < 4) {
+ encoding = Tcl_GetEncoding(interp, NULL);
+ } else if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
return TCL_ERROR;
}
- data = objv[2];
} else {
- Tcl_WrongNumArgs(interp, 1, objv, "?encoding? data");
+ encConvToError:
+ Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?encoding? data");
return TCL_ERROR;
}
+encConvToOK:
/*
* Convert the string to a byte array in 'ds'
*/
stringPtr = Tcl_GetStringFromObj(data, &length);
- Tcl_UtfToExternalDString(encoding, stringPtr, length, &ds);
+ result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length,
+ flags, &ds);
+ if ((flags & TCL_ENCODING_STOPONERROR) && (result != TCL_INDEX_NONE)) {
+ size_t pos = Tcl_NumUtfChars(stringPtr, result);
+ int ucs4;
+ char buf[TCL_INTEGER_SPACE];
+ TclUtfToUCS4(&stringPtr[result], &ucs4);
+ sprintf(buf, "%" TCL_Z_MODIFIER "u", result);
+ Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %"
+ TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4));
+ Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
+ buf, NULL);
+ Tcl_DStringFree(&ds);
+ return TCL_ERROR;
+ }
Tcl_SetObjResult(interp,
Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds),
Tcl_DStringLength(&ds)));
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index a19f781..71a4599 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1756,8 +1756,14 @@ EXTERN const char * Tcl_UtfNext(const char *src);
EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
/* 657 */
EXTERN int Tcl_UniCharIsUnicode(int ch);
-/* Slot 658 is reserved */
-/* Slot 659 is reserved */
+/* 658 */
+EXTERN size_t Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
+ const char *src, size_t srcLen, int flags,
+ Tcl_DString *dsPtr);
+/* 659 */
+EXTERN size_t Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
+ const char *src, size_t srcLen, int flags,
+ Tcl_DString *dsPtr);
/* 660 */
EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
int sigNumber);
@@ -2456,8 +2462,8 @@ typedef struct TclStubs {
const char * (*tcl_UtfNext) (const char *src); /* 655 */
const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
- void (*reserved658)(void);
- void (*reserved659)(void);
+ size_t (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr); /* 658 */
+ size_t (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, size_t srcLen, int flags, Tcl_DString *dsPtr); /* 659 */
int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */
int (*tcl_ListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */
int (*tcl_ListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */
@@ -3734,8 +3740,10 @@ extern const TclStubs *tclStubsPtr;
(tclStubsPtr->tcl_UtfPrev) /* 656 */
#define Tcl_UniCharIsUnicode \
(tclStubsPtr->tcl_UniCharIsUnicode) /* 657 */
-/* Slot 658 is reserved */
-/* Slot 659 is reserved */
+#define Tcl_ExternalToUtfDStringEx \
+ (tclStubsPtr->tcl_ExternalToUtfDStringEx) /* 658 */
+#define Tcl_UtfToExternalDStringEx \
+ (tclStubsPtr->tcl_UtfToExternalDStringEx) /* 659 */
#define Tcl_AsyncMarkFromSignal \
(tclStubsPtr->tcl_AsyncMarkFromSignal) /* 660 */
#define Tcl_ListObjGetElements \
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index b637ce1..7b77282 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -515,10 +515,8 @@ FillEncodingFileMap(void)
*---------------------------------------------------------------------------
*/
-/* Those flags must not conflict with other TCL_ENCODING_* flags in tcl.h */
/* Since TCL_ENCODING_MODIFIED is only used for utf-8/cesu-8 and
* TCL_ENCODING_LE is only used for utf-16/utf-32/ucs-2. re-use the same value */
-#define TCL_ENCODING_MODIFIED 0x20 /* Converting NULL bytes to 0xC0 0x80 */
#define TCL_ENCODING_LE TCL_ENCODING_MODIFIED /* Little-endian encoding */
#define TCL_ENCODING_UTF 0x200 /* For UTF-8 encoding, allow 4-byte output sequences */
@@ -1080,11 +1078,57 @@ Tcl_ExternalToUtfDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
+ Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, 0, dstPtr);
+ return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_ExternalToUtfDStringEx --
+ *
+ * Convert a source buffer from the specified encoding into UTF-8.
+* The parameter flags controls the behavior, if any of the bytes in
+ * the source buffer are invalid or cannot be represented in utf-8.
+ * Possible flags values:
+ * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but
+ * return the first error position (Default in Tcl 9.0).
+ * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
+ * fallback character. Always return -1 (Default in Tcl 8.7).
+ * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
+ * Only valid for "utf-8" and "cesu-8". This flag may be used together
+ * with the other flags.
+ *
+ * Results:
+ * The converted bytes are stored in the DString, which is then NULL
+ * terminated in an encoding-specific manner. The return value is
+ * the error position in the source string or -1 if no conversion error
+ * is reported.
+ *
+ * Side effects:
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+size_t
+Tcl_ExternalToUtfDStringEx(
+ Tcl_Encoding encoding, /* The encoding for the source string, or NULL
+ * for the default system encoding. */
+ const char *src, /* Source string in specified encoding. */
+ size_t srcLen, /* Source string length in bytes, or TCL_INDEX_NONE for
+ * encoding-specific string length. */
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ * converted string is stored. */
+{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, result, soFar, srcRead, dstWrote, dstChars;
+ int result, soFar, srcRead, dstWrote, dstChars;
size_t dstLen;
+ const char *srcStart = src;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1101,7 +1145,7 @@ Tcl_ExternalToUtfDString(
srcLen = encodingPtr->lengthProc(src);
}
- flags = TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START | TCL_ENCODING_END;
if (encodingPtr->toUtfProc == UtfToUtfProc) {
flags |= TCL_ENCODING_MODIFIED | TCL_ENCODING_UTF;
}
@@ -1114,7 +1158,7 @@ Tcl_ExternalToUtfDString(
src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
Tcl_DStringSetLength(dstPtr, soFar);
- return Tcl_DStringValue(dstPtr);
+ return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart);
}
flags &= ~TCL_ENCODING_START;
srcLen -= srcRead;
@@ -1273,10 +1317,57 @@ Tcl_UtfToExternalDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
+ Tcl_UtfToExternalDStringEx(encoding, src, srcLen, 0, dstPtr);
+ return Tcl_DStringValue(dstPtr);
+}
+
+
+/*
+ *-------------------------------------------------------------------------
+ *
+ * Tcl_UtfToExternalDStringEx --
+ *
+ * Convert a source buffer from UTF-8 to the specified encoding.
+ * The parameter flags controls the behavior, if any of the bytes in
+ * the source buffer are invalid or cannot be represented in the
+ * target encoding.
+ * Possible flags values:
+ * TCL_ENCODING_STOPONERROR: don't replace invalid characters/bytes but
+ * return the first error position (Default in Tcl 9.0).
+ * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
+ * fallback character. Always return -1 (Default in Tcl 8.7).
+ * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
+ * Only valid for "utf-8" and "cesu-8". This flag may be used together
+ * with the other flags.
+ *
+ * Results:
+ * The converted bytes are stored in the DString, which is then NULL
+ * terminated in an encoding-specific manner. The return value is
+ * the error position in the source string or -1 if no conversion error
+ * is reported.
+ *
+ * Side effects:
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+size_t
+Tcl_UtfToExternalDStringEx(
+ Tcl_Encoding encoding, /* The encoding for the converted string, or
+ * NULL for the default system encoding. */
+ const char *src, /* Source string in UTF-8. */
+ size_t srcLen, /* Source string length in bytes, or < 0 for
+ * strlen(). */
+ int flags, /* Conversion control flags. */
+ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ * converted string is stored. */
+{
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, result, soFar, srcRead, dstWrote, dstChars;
+ int result, soFar, srcRead, dstWrote, dstChars;
+ const char *srcStart = src;
size_t dstLen;
Tcl_DStringInit(dstPtr);
@@ -1293,7 +1384,7 @@ Tcl_UtfToExternalDString(
} else if (srcLen == TCL_INDEX_NONE) {
srcLen = strlen(src);
}
- flags = TCL_ENCODING_START | TCL_ENCODING_END;
+ flags |= TCL_ENCODING_START | TCL_ENCODING_END;
while (1) {
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
srcLen, flags, &state, dst, dstLen,
@@ -1306,7 +1397,7 @@ Tcl_UtfToExternalDString(
while (i >= soFar) {
Tcl_DStringSetLength(dstPtr, i--);
}
- return Tcl_DStringValue(dstPtr);
+ return (result == TCL_OK) ? TCL_INDEX_NONE : (size_t)(src - srcStart);
}
flags &= ~TCL_ENCODING_START;
@@ -2134,6 +2225,12 @@ BinaryProc(
*-------------------------------------------------------------------------
*/
+#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
+# define STOPONERROR !(flags & TCL_ENCODING_NOCOMPLAIN)
+#else
+# define STOPONERROR (flags & TCL_ENCODING_STOPONERROR)
+#endif
+
static int
UtfToUtfProc(
ClientData clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */
@@ -2216,7 +2313,7 @@ UtfToUtfProc(
*/
if (flags & TCL_ENCODING_MODIFIED) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_MULTIBYTE;
break;
}
@@ -2231,7 +2328,7 @@ UtfToUtfProc(
int low;
const char *saveSrc = src;
size_t len = TclUtfToUCS4(src, &ch);
- if ((len < 2) && (ch != 0) && (flags & TCL_ENCODING_STOPONERROR)
+ if ((len < 2) && (ch != 0) && STOPONERROR
&& (flags & TCL_ENCODING_MODIFIED)) {
result = TCL_CONVERT_SYNTAX;
break;
@@ -2256,7 +2353,8 @@ UtfToUtfProc(
len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
@@ -2271,7 +2369,7 @@ UtfToUtfProc(
dst += Tcl_UniCharToUtf(ch, dst);
ch = low;
} else if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
@@ -2457,7 +2555,7 @@ UtfToUtf32Proc(
}
len = TclUtfToUCS4(src, &ch);
if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -2660,7 +2758,7 @@ UtfToUtf16Proc(
}
len = TclUtfToUCS4(src, &ch);
if (!Tcl_UniCharIsUnicode(ch)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -2880,7 +2978,7 @@ TableToUtfProc(
ch = pageZero[byte];
}
if ((ch == 0) && (byte != 0)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -2996,7 +3094,7 @@ TableFromUtfProc(
word = fromUnicode[(ch >> 8)][ch & 0xFF];
if ((word == 0) && (ch != 0)) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -3184,7 +3282,7 @@ Iso88591FromUtfProc(
|| ((ch >= 0xD800) && (len < 3))
#endif
) {
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -3411,7 +3509,7 @@ EscapeToUtfProc(
if ((checked == dataPtr->numSubTables + 2)
|| (flags & TCL_ENCODING_END)) {
- if ((flags & TCL_ENCODING_STOPONERROR) == 0) {
+ if (!STOPONERROR) {
/*
* Skip the unknown escape sequence.
*/
@@ -3586,7 +3684,7 @@ EscapeFromUtfProc(
if (word == 0) {
state = oldState;
- if (flags & TCL_ENCODING_STOPONERROR) {
+ if (STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
diff --git a/generic/tclStubInit.c b/generic/tclStubInit.c
index 19cf8c1..a75ef74 100644
--- a/generic/tclStubInit.c
+++ b/generic/tclStubInit.c
@@ -1451,8 +1451,8 @@ const TclStubs tclStubs = {
Tcl_UtfNext, /* 655 */
Tcl_UtfPrev, /* 656 */
Tcl_UniCharIsUnicode, /* 657 */
- 0, /* 658 */
- 0, /* 659 */
+ Tcl_ExternalToUtfDStringEx, /* 658 */
+ Tcl_UtfToExternalDStringEx, /* 659 */
Tcl_AsyncMarkFromSignal, /* 660 */
Tcl_ListObjGetElements, /* 661 */
Tcl_ListObjLength, /* 662 */
diff --git a/tests/chanio.test b/tests/chanio.test
index 2d26ac9..11a4e74 100644
--- a/tests/chanio.test
+++ b/tests/chanio.test
@@ -249,7 +249,7 @@ test chan-io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} -bod
} -cleanup {
chan close $f
} -result "\r\n12"
-test chan-io-3.4 {WriteChars: loop over stage buffer} {
+test chan-io-3.4 {WriteChars: loop over stage buffer} -body {
# stage buffer maps to more than can be queued at once.
set f [open $path(test1) w]
chan configure $f -encoding jis0208 -buffersize 16
@@ -257,8 +257,8 @@ test chan-io-3.4 {WriteChars: loop over stage buffer} {
set x [list [contents $path(test1)]]
chan close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
-test chan-io-3.5 {WriteChars: saved != 0} {
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
+test chan-io-3.5 {WriteChars: saved != 0} -body {
# Bytes produced by UtfToExternal from end of last channel buffer had to
# be moved to beginning of next channel buffer to preserve requested
# buffersize.
@@ -268,7 +268,7 @@ test chan-io-3.5 {WriteChars: saved != 0} {
set x [list [contents $path(test1)]]
chan close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} {
# One incomplete UTF-8 character at end of staging buffer. Backup in src
# to the beginning of that UTF-8 character and try again.
@@ -285,7 +285,7 @@ test chan-io-3.6 {WriteChars: (stageRead + dstWrote == 0)} {
chan close $f
lappend x [contents $path(test1)]
} [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"]
-test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} {
+test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body {
# When translating UTF-8 to external, the produced bytes went past end of
# the channel buffer. This is done on purpose - we then truncate the bytes
# at the end of the partial character to preserve the requested blocksize
@@ -297,7 +297,7 @@ test chan-io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} {
set x [list [contents $path(test1)]]
chan close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
test chan-io-3.8 {WriteChars: reset sawLF after each buffer} {
set f [open $path(test1) w]
chan configure $f -encoding ascii -buffering line -translation lf \
diff --git a/tests/cmdAH.test b/tests/cmdAH.test
index ba2a97c..d3e46cf 100644
--- a/tests/cmdAH.test
+++ b/tests/cmdAH.test
@@ -172,7 +172,7 @@ test cmdAH-4.2 {Tcl_EncodingObjCmd} -returnCodes error -body {
} -result {unknown or ambiguous subcommand "foo": must be convertfrom, convertto, dirs, names, or system}
test cmdAH-4.3 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertto
-} -result {wrong # args: should be "encoding convertto ?encoding? data"}
+} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"}
test cmdAH-4.4 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertto foo bar
} -result {unknown encoding "foo"}
@@ -194,7 +194,7 @@ test cmdAH-4.6 {Tcl_EncodingObjCmd} -setup {
} -result 8C
test cmdAH-4.7 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertfrom
-} -result {wrong # args: should be "encoding convertfrom ?encoding? data"}
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"}
test cmdAH-4.8 {Tcl_EncodingObjCmd} -returnCodes error -body {
encoding convertfrom foo bar
} -result {unknown encoding "foo"}
diff --git a/tests/encoding.test b/tests/encoding.test
index c99dc71..fffcdd5 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -297,7 +297,7 @@ test encoding-11.11 {encoding: extended Unicode UTF-32} {
test encoding-12.1 {LoadTableEncoding: normal encoding} {
set x [encoding convertto iso8859-3 Ġ]
- append x [encoding convertto iso8859-3 Õ]
+ append x [encoding convertto -nocomplain iso8859-3 Õ]
append x [encoding convertfrom iso8859-3 Õ]
} "Õ?Ġ"
test encoding-12.2 {LoadTableEncoding: single-byte encoding} {
@@ -346,67 +346,67 @@ test encoding-15.5 {UtfToUtfProc emoji character input} {
} "4 😂"
test encoding-15.6 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uDE02\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uDE02\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uDE02\uD83D]
binary scan $y H* z
list [string length $y] $z
} {10 edb882f09f9882eda0bd}
test encoding-15.7 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83D\uD83D
- set y [encoding convertto utf-8 \uDE02\uD83D\uD83D]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83D\uD83D]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 9 edb882eda0bdeda0bd}
test encoding-15.8 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83Dé
- set y [encoding convertto utf-8 \uDE02\uD83Dé]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83Dé]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 8 edb882eda0bdc3a9}
test encoding-15.9 {UtfToUtfProc emoji character output} {
set x \uDE02\uD83DX
- set y [encoding convertto utf-8 \uDE02\uD83DX]
+ set y [encoding convertto -nocomplain utf-8 \uDE02\uD83DX]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {3 7 edb882eda0bd58}
test encoding-15.10 {UtfToUtfProc high surrogate character output} {
set x \uDE02é
- set y [encoding convertto utf-8 \uDE02é]
+ set y [encoding convertto -nocomplain utf-8 \uDE02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 edb882c3a9}
test encoding-15.11 {UtfToUtfProc low surrogate character output} {
set x \uDA02é
- set y [encoding convertto utf-8 \uDA02é]
+ set y [encoding convertto -nocomplain utf-8 \uDA02é]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 5 eda882c3a9}
test encoding-15.12 {UtfToUtfProc high surrogate character output} {
set x \uDE02Y
- set y [encoding convertto utf-8 \uDE02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDE02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 edb88259}
test encoding-15.13 {UtfToUtfProc low surrogate character output} {
set x \uDA02Y
- set y [encoding convertto utf-8 \uDA02Y]
+ set y [encoding convertto -nocomplain utf-8 \uDA02Y]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {2 4 eda88259}
test encoding-15.14 {UtfToUtfProc high surrogate character output} {
set x \uDE02
- set y [encoding convertto utf-8 \uDE02]
+ set y [encoding convertto -nocomplain utf-8 \uDE02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 edb882}
test encoding-15.15 {UtfToUtfProc low surrogate character output} {
set x \uDA02
- set y [encoding convertto utf-8 \uDA02]
+ set y [encoding convertto -nocomplain utf-8 \uDA02]
binary scan $y H* z
list [string length $x] [string length $y] $z
} {1 3 eda882}
test encoding-15.16 {UtfToUtfProc: Invalid 4-byte UTF-8, see [ed29806ba]} {
set x \xF0\xA0\xA1\xC2
- set y [encoding convertfrom utf-8 \xF0\xA0\xA1\xC2]
+ set y [encoding convertfrom -nocomplain utf-8 \xF0\xA0\xA1\xC2]
list [string length $x] $y
} "4 \xF0\xA0\xA1\xC2"
test encoding-15.17 {UtfToUtfProc emoji character output} {
@@ -487,10 +487,10 @@ test encoding-17.2 {UtfToUcs2Proc} -body {
encoding convertfrom utf-16 [encoding convertto ucs-2 "\U460DC"]
} -result "\uFFFD"
test encoding-17.3 {UtfToUtf16Proc} -body {
- encoding convertto utf-16be "\uDCDC"
+ encoding convertto -nocomplain utf-16be "\uDCDC"
} -result "\xFF\xFD"
test encoding-17.4 {UtfToUtf16Proc} -body {
- encoding convertto utf-16le "\uD8D8"
+ encoding convertto -nocomplain utf-16le "\uD8D8"
} -result "\xFD\xFF"
test encoding-17.5 {UtfToUtf16Proc} -body {
encoding convertto utf-32le "\U460DC"
@@ -615,26 +615,62 @@ test encoding-24.4 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC0\x80"]
} 1
test encoding-24.5 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC0\x81"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC0\x81"]
} 2
test encoding-24.6 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xC1\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xC1\xBF"]
} 2
test encoding-24.7 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xC2\x80"]
} 1
test encoding-24.8 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x80\x80"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x80\x80"]
} 3
test encoding-24.9 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xE0\x9F\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xE0\x9F\xBF"]
} 3
test encoding-24.10 {Parse valid or invalid utf-8} {
string length [encoding convertfrom utf-8 "\xE0\xA0\x80"]
} 1
test encoding-24.11 {Parse valid or invalid utf-8} {
- string length [encoding convertfrom utf-8 "\xEF\xBF\xBF"]
+ string length [encoding convertfrom -nocomplain utf-8 "\xEF\xBF\xBF"]
} 1
+test encoding-24.12 {Parse valid or invalid utf-8} -body {
+ encoding convertfrom utf-8 "\xC0\x81"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC0'}
+test encoding-24.13 {Parse valid or invalid utf-8} -body {
+ encoding convertfrom utf-8 "\xC1\xBF"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 0: '\xC1'}
+test encoding-24.14 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xC2\x80"]
+} 1
+test encoding-24.15 {Parse valid or invalid utf-8} -body {
+ encoding convertfrom utf-8 "Z\xE0\x80"
+} -returnCodes 1 -result {unexpected byte sequence starting at index 1: '\xE0'}
+test encoding-24.16 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\u4343\x80"]
+} -returnCodes 1 -result {expected byte sequence but character 1 was '䍃€' (U+004343)}
+test encoding-24.17 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80"]
+} -result "Z\xC3\xA0\xE2\x82\xAC"
+test encoding-24.18 {Parse valid or invalid utf-8} -constraints testbytestring -body {
+ encoding convertto utf-8 [testbytestring "Z\xE0\x80xxxxxx"]
+} -result "Z\xC3\xA0\xE2\x82\xACxxxxxx"
+test encoding-24.19 {Parse valid or invalid utf-8} -body {
+ encoding convertto utf-8 "ZX\uD800"
+} -returnCodes 1 -match glob -result "unexpected character at index 2: 'U+00D800'"
+test encoding-24.20 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertfrom -nocomplain "\x20"]
+} 1
+test encoding-24.21 {Parse with -nocomplain but without providing encoding} {
+ string length [encoding convertto -nocomplain "\x20"]
+} 1
+test encoding-24.22 {Syntax error, two encodings} -body {
+ encoding convertfrom iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertfrom ?-nocomplain? ?encoding? data"}
+test encoding-24.23 {Syntax error, two encodings} -body {
+ encoding convertto iso8859-1 utf-8 "ZX\uD800"
+} -returnCodes 1 -result {wrong # args: should be "::tcl::encoding::convertto ?-nocomplain? ?encoding? data"}
file delete [file join [temporaryDirectory] iso2022.txt]
@@ -792,7 +828,7 @@ test encoding-28.0 {all encodings load} -body {
if {$name ne "unicode"} {
incr count
}
- encoding convertto $name $string
+ encoding convertto -nocomplain $name $string
# discard the cached internal representation of Tcl_Encoding
# Unfortunately, without this, encoding 2-1 fails.
diff --git a/tests/http.test b/tests/http.test
index 2fd5af4..3b2963e 100644
--- a/tests/http.test
+++ b/tests/http.test
@@ -664,13 +664,11 @@ test http-7.3 {http::formatQuery} -setup {
test http-7.4 {http::formatQuery} -setup {
set enc [http::config -urlencoding]
} -body {
- # this would be reverting to http <=2.4 behavior w/o errors
- # (unknown chars become '?')
http::config -urlencoding "iso8859-1"
http::mapReply "∈"
} -cleanup {
http::config -urlencoding $enc
-} -result {%3F}
+} -errorCode {TCL ENCODING ILLEGALSEQUENCE 0} -result {unexpected character at index 0: 'U+002208'}
package require tcl::idna 1.0
diff --git a/tests/io.test b/tests/io.test
index 0ef3422..9b7a34a 100644
--- a/tests/io.test
+++ b/tests/io.test
@@ -268,7 +268,7 @@ test io-3.3 {WriteChars: compatibility with WriteBytes: flush on line} {
close $f
set x
} "\r\n12"
-test io-3.4 {WriteChars: loop over stage buffer} {
+test io-3.4 {WriteChars: loop over stage buffer} -body {
# stage buffer maps to more than can be queued at once.
set f [open $path(test1) w]
@@ -277,8 +277,8 @@ test io-3.4 {WriteChars: loop over stage buffer} {
set x [list [contents $path(test1)]]
close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
-test io-3.5 {WriteChars: saved != 0} {
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
+test io-3.5 {WriteChars: saved != 0} -body {
# Bytes produced by UtfToExternal from end of last channel buffer
# had to be moved to beginning of next channel buffer to preserve
# requested buffersize.
@@ -289,7 +289,7 @@ test io-3.5 {WriteChars: saved != 0} {
set x [list [contents $path(test1)]]
close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
test io-3.6 {WriteChars: (stageRead + dstWrote == 0)} {
# One incomplete UTF-8 character at end of staging buffer. Backup
# in src to the beginning of that UTF-8 character and try again.
@@ -307,7 +307,7 @@ test io-3.6 {WriteChars: (stageRead + dstWrote == 0)} {
close $f
lappend x [contents $path(test1)]
} [list "12345678901234\x82\x60" "12345678901234\x82\x60\x82\x61"]
-test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} {
+test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} -body {
# When translating UTF-8 to external, the produced bytes went past end
# of the channel buffer. This is done purpose -- we then truncate the
# bytes at the end of the partial character to preserve the requested
@@ -320,7 +320,7 @@ test io-3.7 {WriteChars: (bufPtr->nextAdded > bufPtr->length)} {
set x [list [contents $path(test1)]]
close $f
lappend x [contents $path(test1)]
-} [list "!)!)!)!)!)!)!)!)!" "!)!)!)!)!)!)!)!)!)!)!)!)!)!)!)"]
+} -errorCode {POSIX EILSEQ {illegal byte sequence}} -match glob -result {error writing "*": illegal byte sequence}
test io-3.8 {WriteChars: reset sawLF after each buffer} {
set f [open $path(test1) w]
fconfigure $f -encoding ascii -buffering line -translation lf \
@@ -1532,7 +1532,7 @@ test io-12.8 {ReadChars: multibyte chars split} {
close $f
scan [string index $in end] %c
} 160
-test io-12.9 {ReadChars: multibyte chars split} {
+test io-12.9 {ReadChars: multibyte chars split} knownBug {
set f [open $path(test1) w]
fconfigure $f -translation binary
puts -nonewline $f [string repeat a 9]\xC2
@@ -1543,7 +1543,7 @@ test io-12.9 {ReadChars: multibyte chars split} {
close $f
scan [string index $in end] %c
} 194
-test io-12.10 {ReadChars: multibyte chars split} {
+test io-12.10 {ReadChars: multibyte chars split} knownBug {
set f [open $path(test1) w]
fconfigure $f -translation binary
puts -nonewline $f [string repeat a 9]\xC2
diff --git a/tests/main.test b/tests/main.test
index 2d3f63c..4aadd79 100644
--- a/tests/main.test
+++ b/tests/main.test
@@ -153,7 +153,7 @@ namespace eval ::tcl::test::main {
puts -nonewline $f {puts [string equal \u20ac }
puts $f "€]"
close $f
- catch {set f [open "|[list [interpreter] -encoding ascii script]" r]}
+ catch {set f [open "|[list [interpreter] -encoding iso8859-1 script]" r]}
} -body {
read $f
} -cleanup {
diff --git a/tests/safe.test b/tests/safe.test
index 773b16f..5f3eae8 100644
--- a/tests/safe.test
+++ b/tests/safe.test
@@ -1269,7 +1269,7 @@ test safe-11.7 {testing safe encoding} -setup {
interp eval $i encoding convertfrom
} -returnCodes error -cleanup {
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertfrom ?encoding? data"}
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"}
test safe-11.7.1 {testing safe encoding} -setup {
set i [safe::interpCreate]
} -body {
@@ -1278,7 +1278,7 @@ test safe-11.7.1 {testing safe encoding} -setup {
} -returnCodes ok -match glob -cleanup {
unset -nocomplain m o
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertfrom ?encoding? data"
+} -result {wrong # args: should be "encoding convertfrom ?-nocomplain? ?encoding? data"
while executing
"encoding convertfrom"
invoked from within
@@ -1291,7 +1291,7 @@ test safe-11.8 {testing safe encoding} -setup {
interp eval $i encoding convertto
} -returnCodes error -cleanup {
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertto ?encoding? data"}
+} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"}
test safe-11.8.1 {testing safe encoding} -setup {
set i [safe::interpCreate]
} -body {
@@ -1300,7 +1300,7 @@ test safe-11.8.1 {testing safe encoding} -setup {
} -returnCodes ok -match glob -cleanup {
unset -nocomplain m o
safe::interpDelete $i
-} -result {wrong # args: should be "encoding convertto ?encoding? data"
+} -result {wrong # args: should be "encoding convertto ?-nocomplain? ?encoding? data"
while executing
"encoding convertto"
invoked from within
diff --git a/tests/source.test b/tests/source.test
index c436317..98aaee2 100644
--- a/tests/source.test
+++ b/tests/source.test
@@ -283,11 +283,11 @@ test source-7.6 {source -encoding: mismatch encoding error} -setup {
puts $f "proc € {} {return foo}"
close $f
} -body {
- source -encoding ascii $sourcefile
+ source -encoding iso8859-1 $sourcefile
} -cleanup {
removeFile source.file
-} -returnCodes error -match glob -result {invalid command name*}
+} -returnCodes error -result {invalid command name "€"}
test source-8.1 {source and coroutine/yield} -setup {
set sourcefile [makeFile {} source.file]
diff --git a/win/rules.vc b/win/rules.vc
index 713e7f9..3107756 100644
--- a/win/rules.vc
+++ b/win/rules.vc
@@ -816,8 +816,7 @@ DOTSEPARATED=$(DOTSEPARATED:b=.)
# configuration (ignored for Tcl itself)
# _USE_64BIT_TIME_T - forces a build using 64-bit time_t for 32-bit build
# (CRT library should support this, not needed for Tcl 9.x)
-# TCL_UTF_MAX=4 - forces a build allowing 4-byte UTF-8 sequences internally.
-# (Not needed for Tcl 9.x)
+# TCL_UTF_MAX=3 - forces a build using UTF-16 internally (not recommended).
# Further, LINKERFLAGS are modified based on above.
# Default values for all the above
@@ -1423,13 +1422,13 @@ OPTDEFINES = $(OPTDEFINES) /DNO_STRTOI64=1
!if "$(_USE_64BIT_TIME_T)" == "1"
OPTDEFINES = $(OPTDEFINES) /D_USE_64BIT_TIME_T=1
!endif
-!if "$(TCL_UTF_MAX)" == "4"
-OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=4
-!endif
# _ATL_XP_TARGETING - Newer SDK's need this to build for XP
COMPILERFLAGS = /D_ATL_XP_TARGETING
!endif
+!if "$(TCL_UTF_MAX)" == "3"
+OPTDEFINES = $(OPTDEFINES) /DTCL_UTF_MAX=3
+!endif
# Like the TEA system only set this non empty for non-Tk extensions
# Note: some extensions use PACKAGE_NAME and others use PACKAGE_TCLNAME
@@ -1471,8 +1470,8 @@ cdebug = $(cdebug) -Zi
!endif # $(DEBUG)
-# cwarn includes default warning levels.
-cwarn = $(WARNINGS)
+# cwarn includes default warning levels, also C4146 is useless.
+cwarn = $(WARNINGS) -wd4146
!if "$(MACHINE)" == "AMD64" || "$(MACHINE)" == "ARM64"
# Disable pointer<->int warnings related to cast between different sizes