summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tcl.decls14
-rw-r--r--generic/tcl.h28
-rw-r--r--generic/tclCmdAH.c426
-rw-r--r--generic/tclDecls.h28
-rw-r--r--generic/tclEncoding.c666
-rw-r--r--generic/tclIO.c149
-rw-r--r--generic/tclIO.h5
-rw-r--r--generic/tclInt.h14
-rw-r--r--generic/tclUtil.c9
-rw-r--r--generic/tclZlib.c8
-rw-r--r--unix/tclUnixChan.c4
-rw-r--r--unix/tclUnixFCmd.c46
-rw-r--r--unix/tclUnixFile.c18
-rw-r--r--unix/tclUnixInit.c2
-rw-r--r--win/tclWinSock.c4
15 files changed, 889 insertions, 532 deletions
diff --git a/generic/tcl.decls b/generic/tcl.decls
index 3778de6..1608a88 100644
--- a/generic/tcl.decls
+++ b/generic/tcl.decls
@@ -2500,13 +2500,17 @@ declare 656 {
declare 657 {
int Tcl_UniCharIsUnicode(int ch)
}
+
+# TIP 656
declare 658 {
- Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
- const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr)
-}
+ int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding,
+ const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr,
+ Tcl_Size *errorLocationPtr)
+}
declare 659 {
- Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
- const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr)
+ int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp, Tcl_Encoding encoding,
+ const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr,
+ Tcl_Size *errorLocationPtr)
}
# TIP #511
diff --git a/generic/tcl.h b/generic/tcl.h
index fa4da26..6040099 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -1948,14 +1948,8 @@ typedef struct Tcl_EncodingType {
* 0x00. Only valid for "utf-8" and "cesu-8".
* This flag is implicit for external -> internal conversions,
* optional for internal -> external conversions.
- * TCL_ENCODING_NOCOMPLAIN - If set, the converter
- * substitutes the problematic character(s) with
- * one or more "close" characters in the
- * destination buffer and then continues to
- * convert the source. If clear, the converter returns
- * immediately upon encountering an invalid byte sequence
- * or a source character that has no mapping in the
- * target encoding. Only for Tcl 9.x.
+ * TCL_ENCODING_PROFILE_* - Mutually exclusive encoding profile ids. Note
+ * these are bit masks.
*/
#define TCL_ENCODING_START 0x01
@@ -1970,7 +1964,23 @@ typedef struct Tcl_EncodingType {
#define TCL_ENCODING_NO_TERMINATE 0x08
#define TCL_ENCODING_CHAR_LIMIT 0x10
#define TCL_ENCODING_MODIFIED 0x20
-#define TCL_ENCODING_NOCOMPLAIN 0x40
+/* Reserve top byte for profile values (disjoint) */
+#define TCL_ENCODING_PROFILE_TCL8 0x01000000
+#define TCL_ENCODING_PROFILE_STRICT 0x02000000
+#define TCL_ENCODING_PROFILE_REPLACE 0x03000000
+#define TCL_ENCODING_PROFILE_MASK 0xFF000000
+#define TCL_ENCODING_PROFILE_GET(flags_) ((flags_) & TCL_ENCODING_PROFILE_MASK)
+#define TCL_ENCODING_PROFILE_SET(flags_, profile_) \
+ do { \
+ (flags_) &= ~TCL_ENCODING_PROFILE_MASK; \
+ (flags_) |= profile_; \
+ } while (0)
+/* Still being argued - For Tcl9, is the default strict? TODO */
+#if TCL_MAJOR_VERSION < 9
+#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8
+#else
+#define TCL_ENCODING_PROFILE_DEFAULT TCL_ENCODING_PROFILE_TCL8 /* STRICT? REPLACE? TODO */
+#endif
/*
* The following definitions are the error codes returned by the conversion
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c
index 4df1216..c60a077 100644
--- a/generic/tclCmdAH.c
+++ b/generic/tclCmdAH.c
@@ -51,6 +51,7 @@ static Tcl_ObjCmdProc EncodingConvertfromObjCmd;
static Tcl_ObjCmdProc EncodingConverttoObjCmd;
static Tcl_ObjCmdProc EncodingDirsObjCmd;
static Tcl_ObjCmdProc EncodingNamesObjCmd;
+static Tcl_ObjCmdProc EncodingProfilesObjCmd;
static Tcl_ObjCmdProc EncodingSystemObjCmd;
static inline int ForeachAssignments(Tcl_Interp *interp,
struct ForeachState *statePtr);
@@ -386,6 +387,7 @@ TclInitEncodingCmd(
{"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0},
{"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1},
{"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0},
+ {"profiles", EncodingProfilesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0},
{"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1},
{NULL, NULL, NULL, NULL, NULL, 0}
};
@@ -394,6 +396,121 @@ TclInitEncodingCmd(
}
/*
+ *------------------------------------------------------------------------
+ *
+ * EncodingConvertParseOptions --
+ *
+ * Common routine for parsing arguments passed to encoding convertfrom
+ * and encoding convertto.
+ *
+ * Results:
+ * TCL_OK or TCL_ERROR.
+ *
+ * Side effects:
+ * On success,
+ * - *encPtr is set to the encoding. Must be freed with Tcl_FreeEncoding
+ * if non-NULL
+ * - *dataObjPtr is set to the Tcl_Obj containing the data to encode or
+ * decode
+ * - *profilePtr is set to encoding error handling profile
+ * - *failVarPtr is set to -failindex option value or NULL
+ * On error, all of the above are uninitialized.
+ *
+ *------------------------------------------------------------------------
+ */
+static int
+EncodingConvertParseOptions (
+ Tcl_Interp *interp, /* For error messages. May be NULL */
+ int objc, /* Number of arguments */
+ Tcl_Obj *const objv[], /* Argument objects as passed to command. */
+ Tcl_Encoding *encPtr, /* Where to store the encoding */
+ Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */
+ int *profilePtr, /* Bit mask of encoding option profile */
+ Tcl_Obj **failVarPtr /* Where to store -failindex option value */
+)
+{
+ static const char *const options[] = {"-profile", "-failindex", NULL};
+ enum convertfromOptions { PROFILE, FAILINDEX } optIndex;
+ Tcl_Encoding encoding;
+ Tcl_Obj *dataObj;
+ Tcl_Obj *failVarObj;
+#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED)
+ int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */
+#else
+ int profile = TCL_ENCODING_PROFILE_TCL8;
+#endif
+
+ /*
+ * Possible combinations:
+ * 1) data -> objc = 2
+ * 2) ?options? encoding data -> objc >= 3
+ * It is intentional that specifying option forces encoding to be
+ * specified. Less prone to user error. This should have always been
+ * the case even in 8.6 imho where there were no options (ie (1)
+ * should never have been allowed)
+ */
+
+ if (objc == 1) {
+numArgsError: /* ONLY jump here if nothing needs to be freed!!! */
+ Tcl_WrongNumArgs(interp,
+ 1,
+ objv,
+ "?-profile profile? ?-failindex var? encoding data");
+ ((Interp *)interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS;
+ Tcl_WrongNumArgs(interp, 1, objv, "data");
+ return TCL_ERROR;
+ }
+
+ failVarObj = NULL;
+ if (objc == 2) {
+ encoding = Tcl_GetEncoding(interp, NULL);
+ dataObj = objv[1];
+ } else {
+ int argIndex;
+ for (argIndex = 1; argIndex < (objc-2); ++argIndex) {
+ if (Tcl_GetIndexFromObj(
+ interp, objv[argIndex], options, "option", 0, &optIndex)
+ != TCL_OK) {
+ return TCL_ERROR;
+ }
+ if (++argIndex == (objc - 2)) {
+ goto numArgsError;
+ }
+ switch (optIndex) {
+ case PROFILE:
+ if (TclEncodingProfileNameToId(
+ interp, Tcl_GetString(objv[argIndex]), &profile)
+ != TCL_OK) {
+ return TCL_ERROR;
+ }
+#ifdef NOTNEEDED
+ /* TODO - next line probably not needed as the conversion
+ functions already take care of mapping profile to flags */
+ profile = TclEncodingExternalFlagsToInternal(profile);
+#endif
+ break;
+ case FAILINDEX:
+ failVarObj = objv[argIndex];
+ break;
+ }
+ }
+ /* Get encoding after opts so no need to free it on option error */
+ if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding)
+ != TCL_OK) {
+ return TCL_ERROR;
+ }
+ dataObj = objv[objc - 1];
+ }
+
+ *encPtr = encoding;
+ *dataObjPtr = dataObj;
+ *profilePtr = profile;
+ *failVarPtr = failVarObj;
+
+ return TCL_OK;
+}
+
+/*
*----------------------------------------------------------------------
*
* EncodingConvertfromObjCmd --
@@ -419,113 +536,65 @@ EncodingConvertfromObjCmd(
Tcl_Encoding encoding; /* Encoding to use */
size_t length = 0; /* Length of the byte array being converted */
const char *bytesPtr; /* Pointer to the first byte of the array */
- int flags = 0;
- size_t result;
- Tcl_Obj *failVarObj = NULL;
- /*
- * Decode parameters:
- * Possible combinations:
- * 1) data -> objc = 2
- * 2) encoding data -> objc = 3
- * 3) -nocomplain data -> objc = 3
- * 4) -nocomplain encoding data -> objc = 4
- * 5) -strict data -> objc = 3
- * 6) -strict encoding data -> objc = 4
- * 7) -failindex val data -> objc = 4
- * 8) -failindex val encoding data -> objc = 5
- */
+ int flags;
+ int result;
+ Tcl_Obj *failVarObj;
+ Tcl_Size errorLocation;
- if (objc == 2) {
- encoding = Tcl_GetEncoding(interp, NULL);
- data = objv[1];
- } else if (objc > 2 && objc < 7) {
- int objcUnprocessed = objc;
- data = objv[objc - 1];
- bytesPtr = Tcl_GetString(objv[1]);
- if (bytesPtr[0] == '-' && bytesPtr[1] == 'n'
- && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) {
- flags = TCL_ENCODING_NOCOMPLAIN;
- objcUnprocessed--;
- } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's'
- && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) {
- flags = TCL_ENCODING_STRICT;
- objcUnprocessed--;
- bytesPtr = Tcl_GetString(objv[2]);
- if (bytesPtr[0] == '-' && bytesPtr[1] == 'f'
- && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) {
- /* at least two additional arguments needed */
- if (objc < 6) {
- goto encConvFromError;
- }
- failVarObj = objv[3];
- objcUnprocessed -= 2;
- }
- } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f'
- && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) {
- /* at least two additional arguments needed */
- if (objc < 4) {
- goto encConvFromError;
- }
- failVarObj = objv[2];
- flags = ENCODING_FAILINDEX;
- objcUnprocessed -= 2;
- bytesPtr = Tcl_GetString(objv[3]);
- if (bytesPtr[0] == '-' && bytesPtr[1] == 's'
- && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) {
- flags = TCL_ENCODING_STRICT;
- objcUnprocessed --;
- }
- }
- switch (objcUnprocessed) {
- case 3:
- if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
- return TCL_ERROR;
- }
- break;
- case 2:
- encoding = Tcl_GetEncoding(interp, NULL);
- break;
- default:
- goto encConvFromError;
- }
- } else {
- encConvFromError:
- Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data");
- ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS;
- Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data");
+ if (EncodingConvertParseOptions(
+ interp, objc, objv, &encoding, &data, &flags, &failVarObj)
+ != TCL_OK) {
return TCL_ERROR;
}
/*
- * Convert the string into a byte array in 'ds'
+ * Convert the string into a byte array in 'ds'.
*/
bytesPtr = (char *) Tcl_GetBytesFromObj(interp, data, &length);
+
if (bytesPtr == NULL) {
return TCL_ERROR;
}
- result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length,
- flags, &ds);
- if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) {
- if (failVarObj != NULL) {
- if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) {
- return TCL_ERROR;
- }
- } else {
- char buf[TCL_INTEGER_SPACE];
- sprintf(buf, "%" TCL_Z_MODIFIER "u", result);
- Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %"
- TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result])));
- Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
- buf, NULL);
- Tcl_DStringFree(&ds);
+ result = Tcl_ExternalToUtfDStringEx(interp, encoding, bytesPtr, length, flags,
+ &ds, failVarObj ? &errorLocation : NULL);
+ /* NOTE: ds must be freed beyond this point even on error */
+ switch (result) {
+ case TCL_OK:
+ errorLocation = TCL_INDEX_NONE;
+ break;
+ case TCL_ERROR:
+ /* Error in parameters. Should not happen. interp will have error */
+ Tcl_DStringFree(&ds);
+ return TCL_ERROR;
+ default:
+ /*
+ * One of the TCL_CONVERT_* errors. If we were not interested in the
+ * error location, interp result would already have been filled in
+ * and we can just return the error. Otherwise, we have to return
+ * what could be decoded and the returned error location.
+ */
+ if (failVarObj == NULL) {
+ Tcl_DStringFree(&ds);
return TCL_ERROR;
}
- } else if (failVarObj != NULL) {
- if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) {
+ break;
+ }
+
+ /*
+ * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much
+ * data as was converted.
+ */
+ if (failVarObj) {
+ /* I hope, wide int will cover Tcl_Size data type */
+ if (Tcl_ObjSetVar2(interp,
+ failVarObj,
+ NULL,
+ Tcl_NewWideIntObj(errorLocation),
+ TCL_LEAVE_ERR_MSG) == NULL) {
+ Tcl_DStringFree(&ds);
return TCL_ERROR;
}
}
-
/*
* Note that we cannot use Tcl_DStringResult here because it will
* truncate the string at the first null byte.
@@ -533,9 +602,7 @@ EncodingConvertfromObjCmd(
Tcl_SetObjResult(interp, Tcl_DStringToObj(&ds));
- /*
- * We're done with the encoding
- */
+ /* We're done with the encoding */
Tcl_FreeEncoding(encoding);
return TCL_OK;
@@ -568,80 +635,14 @@ EncodingConverttoObjCmd(
Tcl_Encoding encoding; /* Encoding to use */
size_t length; /* Length of the string being converted */
const char *stringPtr; /* Pointer to the first byte of the string */
- size_t result;
- int flags = 0;
- Tcl_Obj *failVarObj = NULL;
-
- /*
- * Decode parameters:
- * Possible combinations:
- * 1) data -> objc = 2
- * 2) encoding data -> objc = 3
- * 3) -nocomplain data -> objc = 3
- * 4) -nocomplain encoding data -> objc = 4
- * 5) -failindex val data -> objc = 4
- * 6) -failindex val encoding data -> objc = 5
- */
-
- if (objc == 2) {
- encoding = Tcl_GetEncoding(interp, NULL);
- data = objv[1];
- } else if (objc > 2 && objc < 7) {
- int objcUnprocessed = objc;
- data = objv[objc - 1];
- stringPtr = Tcl_GetString(objv[1]);
- if (stringPtr[0] == '-' && stringPtr[1] == 'n'
- && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) {
- flags = TCL_ENCODING_NOCOMPLAIN;
- objcUnprocessed--;
- } else if (stringPtr[0] == '-' && stringPtr[1] == 's'
- && !strncmp(stringPtr, "-strict", strlen(stringPtr))) {
- flags = TCL_ENCODING_STRICT;
- objcUnprocessed--;
- stringPtr = Tcl_GetString(objv[2]);
- if (stringPtr[0] == '-' && stringPtr[1] == 'f'
- && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) {
- /* at least two additional arguments needed */
- if (objc < 6) {
- goto encConvToError;
- }
- failVarObj = objv[3];
- objcUnprocessed -= 2;
- }
- } else if (stringPtr[0] == '-' && stringPtr[1] == 'f'
- && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) {
- /* at least two additional arguments needed */
- if (objc < 4) {
- goto encConvToError;
- }
- failVarObj = objv[2];
- flags = TCL_ENCODING_STOPONERROR;
- objcUnprocessed -= 2;
- stringPtr = Tcl_GetString(objv[3]);
- if (stringPtr[0] == '-' && stringPtr[1] == 's'
- && !strncmp(stringPtr, "-strict", strlen(stringPtr))) {
- flags = TCL_ENCODING_STRICT;
- objcUnprocessed --;
- }
- }
- switch (objcUnprocessed) {
- case 3:
- if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) {
- return TCL_ERROR;
- }
- break;
- case 2:
- encoding = Tcl_GetEncoding(interp, NULL);
- break;
- default:
- goto encConvToError;
- }
- } else {
- encConvToError:
- Tcl_WrongNumArgs(interp, 1, objv, "?-strict? ?-failindex var? ?encoding? data");
- ((Interp *) interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS;
- Tcl_WrongNumArgs(interp, 1, objv, "-nocomplain ?encoding? data");
+ int result;
+ int flags;
+ Tcl_Obj *failVarObj;
+ Tcl_Size errorLocation;
+ if (EncodingConvertParseOptions(
+ interp, objc, objv, &encoding, &data, &flags, &failVarObj)
+ != TCL_OK) {
return TCL_ERROR;
}
@@ -650,40 +651,53 @@ EncodingConverttoObjCmd(
*/
stringPtr = Tcl_GetStringFromObj(data, &length);
- result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length,
- flags, &ds);
- if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) {
- if (failVarObj != NULL) {
- /* I hope, wide int will cover size_t data type */
- if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) {
- return TCL_ERROR;
- }
- } else {
- size_t pos = Tcl_NumUtfChars(stringPtr, result);
- int ucs4;
- char buf[TCL_INTEGER_SPACE];
- TclUtfToUCS4(&stringPtr[result], &ucs4);
- sprintf(buf, "%" TCL_Z_MODIFIER "u", result);
- Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %"
- TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4));
- Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
- buf, NULL);
- Tcl_DStringFree(&ds);
+ result = Tcl_UtfToExternalDStringEx(interp, encoding, stringPtr, length, flags,
+ &ds, failVarObj ? &errorLocation : NULL);
+ /* NOTE: ds must be freed beyond this point even on error */
+
+ switch (result) {
+ case TCL_OK:
+ errorLocation = TCL_INDEX_NONE;
+ break;
+ case TCL_ERROR:
+ /* Error in parameters. Should not happen. interp will have error */
+ Tcl_DStringFree(&ds);
+ return TCL_ERROR;
+ default:
+ /*
+ * One of the TCL_CONVERT_* errors. If we were not interested in the
+ * error location, interp result would already have been filled in
+ * and we can just return the error. Otherwise, we have to return
+ * what could be decoded and the returned error location.
+ */
+ if (failVarObj == NULL) {
+ Tcl_DStringFree(&ds);
return TCL_ERROR;
}
- } else if (failVarObj != NULL) {
- if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) {
+ break;
+ }
+ /*
+ * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much
+ * data as was converted.
+ */
+ if (failVarObj) {
+ /* I hope, wide int will cover Tcl_Size data type */
+ if (Tcl_ObjSetVar2(interp,
+ failVarObj,
+ NULL,
+ Tcl_NewWideIntObj(errorLocation),
+ TCL_LEAVE_ERR_MSG) == NULL) {
+ Tcl_DStringFree(&ds);
return TCL_ERROR;
}
}
+
Tcl_SetObjResult(interp,
Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds),
Tcl_DStringLength(&ds)));
Tcl_DStringFree(&ds);
- /*
- * We're done with the encoding
- */
+ /* We're done with the encoding */
Tcl_FreeEncoding(encoding);
return TCL_OK;
@@ -768,6 +782,34 @@ EncodingNamesObjCmd(
/*
*-----------------------------------------------------------------------------
*
+ * EncodingProfilesObjCmd --
+ *
+ * This command returns a list of the available encoding profiles
+ *
+ * Results:
+ * Returns a standard Tcl result
+ *
+ *-----------------------------------------------------------------------------
+ */
+
+int
+EncodingProfilesObjCmd(
+ TCL_UNUSED(void *),
+ Tcl_Interp* interp, /* Tcl interpreter */
+ int objc, /* Number of command line args */
+ Tcl_Obj* const objv[]) /* Vector of command line args */
+{
+ if (objc > 1) {
+ Tcl_WrongNumArgs(interp, 1, objv, NULL);
+ return TCL_ERROR;
+ }
+ TclGetEncodingProfiles(interp);
+ return TCL_OK;
+}
+
+/*
+ *-----------------------------------------------------------------------------
+ *
* EncodingSystemObjCmd --
*
* This command retrieves or changes the system encoding
diff --git a/generic/tclDecls.h b/generic/tclDecls.h
index f219500..bdc094d 100644
--- a/generic/tclDecls.h
+++ b/generic/tclDecls.h
@@ -1766,13 +1766,17 @@ EXTERN const char * Tcl_UtfPrev(const char *src, const char *start);
/* 657 */
EXTERN int Tcl_UniCharIsUnicode(int ch);
/* 658 */
-EXTERN Tcl_Size Tcl_ExternalToUtfDStringEx(Tcl_Encoding encoding,
- const char *src, Tcl_Size srcLen, int flags,
- Tcl_DString *dsPtr);
+EXTERN int Tcl_ExternalToUtfDStringEx(Tcl_Interp *interp,
+ Tcl_Encoding encoding, const char *src,
+ Tcl_Size srcLen, int flags,
+ Tcl_DString *dsPtr,
+ Tcl_Size *errorLocationPtr);
/* 659 */
-EXTERN Tcl_Size Tcl_UtfToExternalDStringEx(Tcl_Encoding encoding,
- const char *src, Tcl_Size srcLen, int flags,
- Tcl_DString *dsPtr);
+EXTERN int Tcl_UtfToExternalDStringEx(Tcl_Interp *interp,
+ Tcl_Encoding encoding, const char *src,
+ Tcl_Size srcLen, int flags,
+ Tcl_DString *dsPtr,
+ Tcl_Size *errorLocationPtr);
/* 660 */
EXTERN int Tcl_AsyncMarkFromSignal(Tcl_AsyncHandler async,
int sigNumber);
@@ -2529,8 +2533,8 @@ typedef struct TclStubs {
const char * (*tcl_UtfNext) (const char *src); /* 655 */
const char * (*tcl_UtfPrev) (const char *src, const char *start); /* 656 */
int (*tcl_UniCharIsUnicode) (int ch); /* 657 */
- Tcl_Size (*tcl_ExternalToUtfDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 658 */
- Tcl_Size (*tcl_UtfToExternalDStringEx) (Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr); /* 659 */
+ int (*tcl_ExternalToUtfDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 658 */
+ int (*tcl_UtfToExternalDStringEx) (Tcl_Interp *interp, Tcl_Encoding encoding, const char *src, Tcl_Size srcLen, int flags, Tcl_DString *dsPtr, Tcl_Size *errorLocationPtr); /* 659 */
int (*tcl_AsyncMarkFromSignal) (Tcl_AsyncHandler async, int sigNumber); /* 660 */
int (*tcl_ListObjGetElements) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *objcPtr, Tcl_Obj ***objvPtr); /* 661 */
int (*tcl_ListObjLength) (Tcl_Interp *interp, Tcl_Obj *listPtr, size_t *lengthPtr); /* 662 */
@@ -3956,12 +3960,12 @@ extern const TclStubs *tclStubsPtr;
#undef Tcl_UtfToExternalDString
#define Tcl_UtfToExternalDString(encoding, src, len, ds) \
- (Tcl_UtfToExternalDStringEx((encoding), (src), (len), \
- TCL_ENCODING_NOCOMPLAIN, (ds)), Tcl_DStringValue(ds))
+ (Tcl_UtfToExternalDStringEx(NULL, (encoding), (src), (len), \
+ TCL_ENCODING_PROFILE_TCL8, (ds), NULL), Tcl_DStringValue(ds))
#undef Tcl_ExternalToUtfDString
#define Tcl_ExternalToUtfDString(encoding, src, len, ds) \
- (Tcl_ExternalToUtfDStringEx((encoding), (src), (len), \
- TCL_ENCODING_NOCOMPLAIN, (ds)), Tcl_DStringValue(ds))
+ (Tcl_ExternalToUtfDStringEx(NULL, (encoding), (src), (len), \
+ TCL_ENCODING_PROFILE_TCL8, (ds), NULL), Tcl_DStringValue(ds))
#if defined(USE_TCL_STUBS)
# if defined(_WIN32) && defined(_WIN64)
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index ce5626f..68f22b0 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -188,6 +188,32 @@ static Tcl_Encoding systemEncoding = NULL;
Tcl_Encoding tclIdentityEncoding = NULL;
/*
+ * Names of encoding profiles and corresponding integer values
+ */
+static struct TclEncodingProfiles {
+ const char *name;
+ int value;
+} encodingProfiles[] = {
+ {"tcl8", TCL_ENCODING_PROFILE_TCL8},
+ {"strict", TCL_ENCODING_PROFILE_STRICT},
+ {"replace", TCL_ENCODING_PROFILE_REPLACE},
+};
+#define PROFILE_STRICT(flags_) \
+ ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_STRICT) \
+ || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \
+ && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_STRICT))
+
+#define PROFILE_REPLACE(flags_) \
+ ((TCL_ENCODING_PROFILE_GET(flags_) == TCL_ENCODING_PROFILE_REPLACE) \
+ || (TCL_ENCODING_PROFILE_GET(flags_) == 0 \
+ && TCL_ENCODING_PROFILE_DEFAULT == TCL_ENCODING_PROFILE_REPLACE))
+
+#define UNICODE_REPLACE_CHAR ((Tcl_UniChar)0xFFFD)
+#define SURROGATE(c_) (((c_) & ~0x7FF) == 0xD800)
+#define HIGH_SURROGATE(c_) (((c_) & ~0x3FF) == 0xD800)
+#define LOW_SURROGATE(c_) (((c_) & ~0x3FF) == 0xDC00)
+
+/*
* The following variable is used in the sparse matrix code for a
* TableEncoding to represent a page in the table that has no entries.
*/
@@ -230,6 +256,7 @@ static Tcl_EncodingConvertProc UtfToUtfProc;
static Tcl_EncodingConvertProc Iso88591FromUtfProc;
static Tcl_EncodingConvertProc Iso88591ToUtfProc;
+
/*
* A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field
* of the internalrep. This should help the lifetime of encodings be more useful.
@@ -1114,7 +1141,8 @@ Tcl_ExternalToUtfDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
- Tcl_ExternalToUtfDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+ Tcl_ExternalToUtfDStringEx(
+ NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_TCL8, dstPtr, NULL);
return Tcl_DStringValue(dstPtr);
}
@@ -1128,34 +1156,55 @@ Tcl_ExternalToUtfDString(
* The parameter flags controls the behavior, if any of the bytes in
* the source buffer are invalid or cannot be represented in utf-8.
* Possible flags values:
- * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
- * fallback character. Always return -1 (Default in Tcl 8.7).
- * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
- * Only valid for "utf-8" and "cesu-8". This flag may be used together
- * with the other flags.
+ * target encoding. It should be composed by OR-ing the following:
+ * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
+ * - TCL_ENCODING_MODIFIED: enable Tcl internal conversion mapping \xC0\x80
+ * to 0x00. Only valid for "utf-8" and "cesu-8".
+ * Any other flag bits will cause an error to be returned (for future
+ * compatibility)
*
* Results:
- * The converted bytes are stored in the DString, which is then NULL
- * terminated in an encoding-specific manner. The return value is
- * the error position in the source string or -1 if no conversion error
- * is reported.
- *
+ * The return value is one of
+ * TCL_OK: success. Converted string in *dstPtr
+ * TCL_ERROR: error in passed parameters. Error message in interp
+ * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ * TCL_CONVERT_UNKNOWN: source contained a character that could not
+ * be represented in target encoding.
+ *
* Side effects:
- * None.
+ *
+ * TCL_OK: The converted bytes are stored in the DString and NUL
+ * terminated in an encoding-specific manner.
+ * TCL_ERROR: an error, message is stored in the interp if not NULL.
+ * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ * in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ * no error message is stored as it is expected the caller is
+ * interested in whatever is decoded so far and not treating this
+ * as an error condition.
+ *
+ * In addition, *dstPtr is always initialized and must be cleared
+ * by the caller irrespective of the return code.
*
*-------------------------------------------------------------------------
*/
-Tcl_Size
+int
Tcl_ExternalToUtfDStringEx(
+ Tcl_Interp *interp, /* For error messages. May be NULL. */
Tcl_Encoding encoding, /* The encoding for the source string, or NULL
* for the default system encoding. */
const char *src, /* Source string in specified encoding. */
Tcl_Size srcLen, /* Source string length in bytes, or < 0 for
* encoding-specific string length. */
int flags, /* Conversion control flags. */
- Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the
* converted string is stored. */
+ Tcl_Size *errorLocPtr) /* Where to store the error location
+ (or TCL_INDEX_NONE if no error). May
+ be NULL. */
{
char *dst;
Tcl_EncodingState state;
@@ -1164,7 +1213,18 @@ Tcl_ExternalToUtfDStringEx(
Tcl_Size dstLen;
const char *srcStart = src;
+ /* DO FIRST - Must always be initialized before returning */
Tcl_DStringInit(dstPtr);
+
+ if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+ /* TODO - what other flags are illegal? - See TIP 656 */
+ Tcl_SetResult(interp,
+ "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+ TCL_STATIC);
+ Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+ return TCL_ERROR;
+ }
+
dst = Tcl_DStringValue(dstPtr);
dstLen = dstPtr->spaceAvl - 1;
@@ -1179,6 +1239,7 @@ Tcl_ExternalToUtfDStringEx(
srcLen = encodingPtr->lengthProc(src);
}
+ flags = TclEncodingExternalFlagsToInternal(flags);
flags |= TCL_ENCODING_START | TCL_ENCODING_END;
if (encodingPtr->toUtfProc == UtfToUtfProc) {
flags |= ENCODING_INPUT;
@@ -1189,19 +1250,45 @@ Tcl_ExternalToUtfDStringEx(
flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
- src += srcRead;
- if (result != TCL_CONVERT_NOSPACE) {
- Tcl_DStringSetLength(dstPtr, soFar);
- return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart);
- }
- flags &= ~TCL_ENCODING_START;
- srcLen -= srcRead;
- if (Tcl_DStringLength(dstPtr) == 0) {
- Tcl_DStringSetLength(dstPtr, dstLen);
- }
- Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
- dst = Tcl_DStringValue(dstPtr) + soFar;
- dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
+ src += srcRead;
+ if (result != TCL_CONVERT_NOSPACE) {
+ Tcl_Size nBytesProcessed = (src - srcStart);
+
+ Tcl_DStringSetLength(dstPtr, soFar);
+ if (errorLocPtr) {
+ /*
+ * Do not write error message into interpreter if caller
+ * wants to know error location.
+ */
+ *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+ }
+ else {
+ /* Caller wants error message on failure */
+ if (result != TCL_OK && interp != NULL) {
+ char buf[TCL_INTEGER_SPACE];
+ sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed);
+ Tcl_SetObjResult(
+ interp,
+ Tcl_ObjPrintf("unexpected byte sequence starting at index %"
+ TCL_Z_MODIFIER "u: '\\x%X'",
+ nBytesProcessed,
+ UCHAR(srcStart[nBytesProcessed])));
+ Tcl_SetErrorCode(
+ interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", buf, NULL);
+ }
+ }
+ return result;
+ }
+
+ /* Expand space and continue */
+ flags &= ~TCL_ENCODING_START;
+ srcLen -= srcRead;
+ if (Tcl_DStringLength(dstPtr) == 0) {
+ Tcl_DStringSetLength(dstPtr, dstLen);
+ }
+ Tcl_DStringSetLength(dstPtr, 2 * Tcl_DStringLength(dstPtr) + 1);
+ dst = Tcl_DStringValue(dstPtr) + soFar;
+ dstLen = Tcl_DStringLength(dstPtr) - soFar - 1;
}
}
@@ -1351,7 +1438,8 @@ Tcl_UtfToExternalDString(
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
{
- Tcl_UtfToExternalDStringEx(encoding, src, srcLen, TCL_ENCODING_NOCOMPLAIN, dstPtr);
+ Tcl_UtfToExternalDStringEx(
+ NULL, encoding, src, srcLen, TCL_ENCODING_PROFILE_DEFAULT, dstPtr, NULL);
return Tcl_DStringValue(dstPtr);
}
@@ -1364,36 +1452,53 @@ Tcl_UtfToExternalDString(
* Convert a source buffer from UTF-8 to the specified encoding.
* The parameter flags controls the behavior, if any of the bytes in
* the source buffer are invalid or cannot be represented in the
- * target encoding.
- * Possible flags values:
- * TCL_ENCODING_NOCOMPLAIN: replace invalid characters/bytes by a default
- * fallback character. Always return -1 (Default in Tcl 8.7).
- * TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 in stead of 0x00.
- * Only valid for "utf-8" and "cesu-8". This flag may be used together
- * with the other flags.
+ * target encoding. It should be composed by OR-ing the following:
+ * - *At most one* of TCL_ENCODING_PROFILE{DEFAULT,TCL8,STRICT}
+ * - TCL_ENCODING_STOPONERROR: Backward compatibility. Sets the profile
+ * to TCL_ENCODING_PROFILE_STRICT overriding any specified profile flags
+ * - TCL_ENCODING_MODIFIED: convert NULL bytes to \xC0\x80 instead
+ * of 0x00. Only valid for "utf-8" and "cesu-8".
*
* Results:
- * The converted bytes are stored in the DString, which is then NULL
- * terminated in an encoding-specific manner. The return value is
- * the error position in the source string or -1 if no conversion error
- * is reported.
+ * The return value is one of
+ * TCL_OK: success. Converted string in *dstPtr
+ * TCL_ERROR: error in passed parameters. Error message in interp
+ * TCL_CONVERT_MULTIBYTE: source ends in truncated multibyte sequence
+ * TCL_CONVERT_SYNTAX: source is not conformant to encoding definition
+ * TCL_CONVERT_UNKNOWN: source contained a character that could not
+ * be represented in target encoding.
*
* Side effects:
- * None.
+ *
+ * TCL_OK: The converted bytes are stored in the DString and NUL
+ * terminated in an encoding-specific manner
+ * TCL_ERROR: an error, message is stored in the interp if not NULL.
+ * TCL_CONVERT_*: if errorLocPtr is NULL, an error message is stored
+ * in the interpreter (if not NULL). If errorLocPtr is not NULL,
+ * no error message is stored as it is expected the caller is
+ * interested in whatever is decoded so far and not treating this
+ * as an error condition.
+ *
+ * In addition, *dstPtr is always initialized and must be cleared
+ * by the caller irrespective of the return code.
*
*-------------------------------------------------------------------------
*/
-Tcl_Size
+int
Tcl_UtfToExternalDStringEx(
+ Tcl_Interp *interp, /* For error messages. May be NULL. */
Tcl_Encoding encoding, /* The encoding for the converted string, or
* NULL for the default system encoding. */
const char *src, /* Source string in UTF-8. */
Tcl_Size srcLen, /* Source string length in bytes, or < 0 for
* strlen(). */
int flags, /* Conversion control flags. */
- Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
+ Tcl_DString *dstPtr, /* Uninitialized or free DString in which the
* converted string is stored. */
+ Tcl_Size *errorLocPtr) /* Where to store the error location
+ (or TCL_INDEX_NONE if no error). May
+ be NULL. */
{
char *dst;
Tcl_EncodingState state;
@@ -1402,7 +1507,18 @@ Tcl_UtfToExternalDStringEx(
const char *srcStart = src;
Tcl_Size dstLen;
+ /* DO FIRST - must always be initialized on return */
Tcl_DStringInit(dstPtr);
+
+ if (flags & (TCL_ENCODING_START|TCL_ENCODING_END)) {
+ /* TODO - what other flags are illegal? - See TIP 656 */
+ Tcl_SetResult(interp,
+ "Parameter error: TCL_ENCODING_{START,STOP} bits set in flags.",
+ TCL_STATIC);
+ Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALFLAGS", NULL);
+ return TCL_ERROR;
+ }
+
dst = Tcl_DStringValue(dstPtr);
dstLen = dstPtr->spaceAvl - 1;
@@ -1416,20 +1532,49 @@ Tcl_UtfToExternalDStringEx(
} else if (srcLen == TCL_INDEX_NONE) {
srcLen = strlen(src);
}
+
+ flags = TclEncodingExternalFlagsToInternal(flags);
flags |= TCL_ENCODING_START | TCL_ENCODING_END;
while (1) {
result = encodingPtr->fromUtfProc(encodingPtr->clientData, src,
- srcLen, flags, &state, dst, dstLen,
- &srcRead, &dstWrote, &dstChars);
+ srcLen, flags, &state, dst, dstLen,
+ &srcRead, &dstWrote, &dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
src += srcRead;
if (result != TCL_CONVERT_NOSPACE) {
+ Tcl_Size nBytesProcessed = (src - srcStart);
int i = soFar + encodingPtr->nullSize - 1;
while (i >= soFar) {
Tcl_DStringSetLength(dstPtr, i--);
}
- return (result == TCL_OK) ? TCL_INDEX_NONE : (Tcl_Size)(src - srcStart);
+ if (errorLocPtr) {
+ /*
+ * Do not write error message into interpreter if caller
+ * wants to know error location.
+ */
+ *errorLocPtr = result == TCL_OK ? TCL_INDEX_NONE : nBytesProcessed;
+ }
+ else {
+ /* Caller wants error message on failure */
+ if (result != TCL_OK && interp != NULL) {
+ Tcl_Size pos = Tcl_NumUtfChars(srcStart, nBytesProcessed);
+ int ucs4;
+ char buf[TCL_INTEGER_SPACE];
+ TclUtfToUCS4(&srcStart[nBytesProcessed], &ucs4);
+ sprintf(buf, "%" TCL_Z_MODIFIER "u", nBytesProcessed);
+ Tcl_SetObjResult(
+ interp,
+ Tcl_ObjPrintf(
+ "unexpected character at index %" TCL_Z_MODIFIER
+ "u: 'U+%06X'",
+ pos,
+ ucs4));
+ Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE",
+ buf, NULL);
+ }
+ }
+ return result;
}
flags &= ~TCL_ENCODING_START;
@@ -2257,14 +2402,12 @@ BinaryProc(
*-------------------------------------------------------------------------
*/
-#define STOPONERROR (!(flags & TCL_ENCODING_NOCOMPLAIN))
-
static int
UtfToUtfProc(
void *clientData, /* additional flags, e.g. TCL_ENCODING_MODIFIED */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
- int flags, /* Conversion control flags. */
+ int flags, /* TCL_ENCODING_* conversion control flags. */
TCL_UNUSED(Tcl_EncodingState *),
char *dst, /* Output buffer in which converted string is
* stored. */
@@ -2286,6 +2429,7 @@ UtfToUtfProc(
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
int ch;
+ int profile;
result = TCL_OK;
@@ -2303,7 +2447,9 @@ UtfToUtfProc(
flags |= PTR2INT(clientData);
dstEnd = dst + dstLen - ((flags & ENCODING_UTF) ? TCL_UTF_MAX : 6);
+ profile = TCL_ENCODING_PROFILE_GET(flags);
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
+
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
@@ -2324,25 +2470,34 @@ UtfToUtfProc(
*/
*dst++ = *src++;
- } else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd)
- && (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) && (!(flags & ENCODING_INPUT)
- || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
- || (flags & ENCODING_FAILINDEX))) {
- /*
- * If in input mode, and -strict or -failindex is specified: This is an error.
- */
- if ((STOPONERROR) && (flags & ENCODING_INPUT)) {
- result = TCL_CONVERT_SYNTAX;
- break;
+ }
+ else if ((UCHAR(*src) == 0xC0) && (src + 1 < srcEnd) &&
+ (UCHAR(src[1]) == 0x80) && !(flags & TCL_ENCODING_MODIFIED) &&
+ (!(flags & ENCODING_INPUT) || PROFILE_STRICT(profile) ||
+ PROFILE_REPLACE(profile))) {
+ /* Special sequence \xC0\x80 */
+ if ((PROFILE_STRICT(profile) || PROFILE_REPLACE(profile)) && (flags & ENCODING_INPUT)) {
+ if (PROFILE_REPLACE(profile)) {
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ src += 2;
+ } else {
+ /* PROFILE_STRICT */
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+ } else {
+ /*
+ * Convert 0xC080 to real nulls when we are in output mode,
+ * irrespective of the profile.
+ */
+ *dst++ = 0;
+ src += 2;
}
+ }
+ else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
- * Convert 0xC080 to real nulls when we are in output mode, with or without '-strict'.
- */
- *dst++ = 0;
- src += 2;
- } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
- /*
+ * Incomplete byte sequence.
* Always check before using TclUtfToUCS4. Not doing can so
* cause it run beyond the end of the buffer! If we happen such an
* incomplete char its bytes are made to represent themselves
@@ -2350,32 +2505,45 @@ UtfToUtfProc(
*/
if (flags & ENCODING_INPUT) {
- if ((STOPONERROR) && (flags & TCL_ENCODING_CHAR_LIMIT)) {
- result = TCL_CONVERT_MULTIBYTE;
+ /* Incomplete bytes for modified UTF-8 target */
+ if (PROFILE_STRICT(profile)) {
+ result = (flags & TCL_ENCODING_CHAR_LIMIT)
+ ? TCL_CONVERT_MULTIBYTE
+ : TCL_CONVERT_SYNTAX;
break;
}
- if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX)) {
- result = TCL_CONVERT_SYNTAX;
- break;
- }
- }
- char chbuf[2];
- chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
- TclUtfToUCS4(chbuf, &ch);
+ }
+ if (PROFILE_REPLACE(profile)) {
+ ch = UNICODE_REPLACE_CHAR;
+ ++src;
+ } else {
+ /* TCL_ENCODING_PROFILE_TCL8 */
+ char chbuf[2];
+ chbuf[0] = UCHAR(*src++); chbuf[1] = 0;
+ TclUtfToUCS4(chbuf, &ch);
+ }
dst += Tcl_UniCharToUtf(ch, dst);
- } else {
+ }
+ else {
+ int isInvalid = 0;
size_t len = TclUtfToUCS4(src, &ch);
if (flags & ENCODING_INPUT) {
- if ((len < 2) && (ch != 0)
- && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
- goto utf8Syntax;
- } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)
- && (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) || (flags & ENCODING_FAILINDEX))) {
- utf8Syntax:
- result = TCL_CONVERT_SYNTAX;
- break;
+ if ((len < 2) && (ch != 0)) {
+ isInvalid = 1;
+ } else if ((ch > 0xFFFF) && !(flags & ENCODING_UTF)) {
+ isInvalid = 1;
+ }
+ if (isInvalid) {
+ if (PROFILE_STRICT(profile)) {
+ result = TCL_CONVERT_SYNTAX;
+ break;
+ }
+ else if (PROFILE_REPLACE(profile)) {
+ ch = UNICODE_REPLACE_CHAR;
+ }
}
}
+
const char *saveSrc = src;
src += len;
if (!(flags & ENCODING_UTF) && !(flags & ENCODING_INPUT) && (ch > 0x3FF)) {
@@ -2399,34 +2567,42 @@ UtfToUtfProc(
/*
* A surrogate character is detected, handle especially.
*/
-
- if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) && (flags & ENCODING_UTF)) {
+ if (PROFILE_STRICT(profile) && (flags & ENCODING_UTF)) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
}
- int low = ch;
- len = (src <= srcEnd-3) ? TclUtfToUCS4(src, &low) : 0;
-
- if (((low & ~0x3FF) != 0xDC00) || (ch & 0x400)) {
-
- if (STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- src = saveSrc;
- break;
+ if (PROFILE_REPLACE(profile)) {
+ /* TODO - is this right for cesu8 or should we fall through below? */
+ ch = UNICODE_REPLACE_CHAR;
+ }
+ else {
+ int low = ch;
+ len = (src <= srcEnd - 3) ? TclUtfToUCS4(src, &low) : 0;
+
+ if ((!LOW_SURROGATE(low)) || (ch & 0x400)) {
+
+ if (PROFILE_STRICT(profile)) {
+ result = TCL_CONVERT_UNKNOWN;
+ src = saveSrc;
+ break;
+ }
+ goto cesu8;
}
- goto cesu8;
+ src += len;
+ dst += Tcl_UniCharToUtf(ch, dst);
+ ch = low;
}
- src += len;
- dst += Tcl_UniCharToUtf(ch, dst);
- ch = low;
#endif
- } else if (STOPONERROR && !(flags & ENCODING_INPUT) && (((ch & ~0x7FF) == 0xD800))) {
+ } else if (PROFILE_STRICT(profile) &&
+ (!(flags & ENCODING_INPUT)) &&
+ SURROGATE(ch)) {
result = TCL_CONVERT_UNKNOWN;
src = saveSrc;
break;
- } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
- && (flags & ENCODING_INPUT) && ((ch & ~0x7FF) == 0xD800)) {
+ } else if (PROFILE_STRICT(profile) &&
+ (flags & ENCODING_INPUT) &&
+ SURROGATE(ch)) {
result = TCL_CONVERT_SYNTAX;
src = saveSrc;
break;
@@ -2494,8 +2670,8 @@ Utf32ToUtfProc(
/*
* Check alignment with utf-32 (4 == sizeof(UTF-32))
*/
-
if (bytesLeft != 0) {
+ /* We have a truncated code unit */
result = TCL_CONVERT_MULTIBYTE;
srcLen -= bytesLeft;
}
@@ -2517,17 +2693,14 @@ Utf32ToUtfProc(
} else {
ch = (src[0] & 0xFF) << 24 | (src[1] & 0xFF) << 16 | (src[2] & 0xFF) << 8 | (src[3] & 0xFF);
}
- if ((unsigned)ch > 0x10FFFF) {
- if (STOPONERROR) {
+
+ if ((unsigned)ch > 0x10FFFF || SURROGATE(ch)) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_SYNTAX;
break;
}
- ch = 0xFFFD;
- } else if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)
- && ((ch & ~0x7FF) == 0xD800)) {
- if (STOPONERROR) {
- result = TCL_CONVERT_SYNTAX;
- break;
+ if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
}
}
@@ -2541,25 +2714,31 @@ Utf32ToUtfProc(
} else {
dst += Tcl_UniCharToUtf(ch, dst);
}
- src += sizeof(unsigned int);
+ src += 4;
}
+
+
+ /*
+ * If we had a truncated code unit at the end AND this is the last
+ * fragment AND profile is not "strict", stick FFFD in its place.
+ */
if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
- /* We have a single byte left-over at the end */
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
} else {
- /* destination is not full, so we really are at the end now */
- if ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT) {
- result = TCL_CONVERT_SYNTAX;
- } else {
- result = TCL_OK;
- dst += Tcl_UniCharToUtf(0xFFFD, dst);
- numChars++;
- src += bytesLeft;
- }
- }
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ /* PROFILE_REPLACE or PROFILE_TCL8 */
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ numChars++;
+ src += bytesLeft; /* Go past truncated code unit */
+ }
+ }
}
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2636,11 +2815,14 @@ UtfToUtf32Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if ((ch & ~0x7FF) == 0xD800) {
- if (STOPONERROR) {
+ if (SURROGATE(ch)) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
+ if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
+ }
}
src += len;
if (flags & TCL_ENCODING_LE) {
@@ -2772,22 +2954,27 @@ Utf16ToUtfProc(
/* Bug [10c2c17c32]. If Hi surrogate, finish 3-byte UTF-8 */
dst += Tcl_UniCharToUtf(-1, dst);
}
+
+ /*
+ * If we had a truncated code unit at the end AND this is the last
+ * fragment AND profile is not "strict", stick FFFD in its place.
+ */
if ((flags & TCL_ENCODING_END) && (result == TCL_CONVERT_MULTIBYTE)) {
- /* We have a single byte left-over at the end */
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
} else {
- /* destination is not full, so we really are at the end now */
- if (((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
- result = TCL_CONVERT_SYNTAX;
- } else {
- result = TCL_OK;
- dst += Tcl_UniCharToUtf(0xFFFD, dst);
- numChars++;
- src++;
- }
- }
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_SYNTAX;
+ } else {
+ /* PROFILE_REPLACE or PROFILE_TCL8 */
+ result = TCL_OK;
+ dst += Tcl_UniCharToUtf(UNICODE_REPLACE_CHAR, dst);
+ numChars++;
+ src++; /* Go past truncated code unit */
+ }
+ }
}
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2864,11 +3051,14 @@ UtfToUtf16Proc(
break;
}
len = TclUtfToUCS4(src, &ch);
- if ((ch & ~0x7FF) == 0xD800) {
- if (STOPONERROR) {
+ if (SURROGATE(ch)) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
+ if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
+ }
}
src += len;
if (flags & TCL_ENCODING_LE) {
@@ -2971,25 +3161,25 @@ UtfToUcs2Proc(
#if TCL_UTF_MAX < 4
len = TclUtfToUniChar(src, &ch);
if ((ch >= 0xD800) && (len < 3)) {
- if (STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- break;
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_UNKNOWN;
+ break;
}
src += len;
src += TclUtfToUniChar(src, &ch);
- ch = 0xFFFD;
+ ch = UNICODE_REPLACE_CHAR;
}
#else
len = TclUtfToUniChar(src, &ch);
if (ch > 0xFFFF) {
- if (STOPONERROR) {
- result = TCL_CONVERT_UNKNOWN;
- break;
+ if (PROFILE_STRICT(flags)) {
+ result = TCL_CONVERT_UNKNOWN;
+ break;
}
- ch = 0xFFFD;
+ ch = UNICODE_REPLACE_CHAR;
}
#endif
- if (STOPONERROR && ((ch & ~0x7FF) == 0xD800)) {
+ if (PROFILE_STRICT(flags) && ((ch & ~0x7FF) == 0xD800)) {
result = TCL_CONVERT_SYNTAX;
break;
}
@@ -3087,24 +3277,35 @@ TableToUtfProc(
if (prefixBytes[byte]) {
src++;
if (src >= srcEnd) {
+ /*
+ * TODO - this is broken. For consistency with other
+ * decoders, an error should be raised only if strict.
+ * However, doing that check cause a whole bunch of test
+ * failures. Need to verify if those tests are in fact
+ * correct.
+ */
src--;
result = TCL_CONVERT_MULTIBYTE;
break;
}
+ ch = toUnicode[byte][*((unsigned char *)src)];
ch = toUnicode[byte][*((unsigned char *) src)];
} else {
ch = pageZero[byte];
}
if ((ch == 0) && (byte != 0)) {
- if ((flags & ENCODING_FAILINDEX)
- || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_SYNTAX;
break;
}
if (prefixBytes[byte]) {
src--;
}
- ch = (Tcl_UniChar) byte;
+ if (PROFILE_REPLACE(flags)) {
+ ch = UNICODE_REPLACE_CHAR;
+ } else {
+ ch = (Tcl_UniChar)byte;
+ }
}
/*
@@ -3213,11 +3414,11 @@ TableFromUtfProc(
word = fromUnicode[(ch >> 8)][ch & 0xFF];
if ((word == 0) && (ch != 0)) {
- if (STOPONERROR) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
- word = dataPtr->fallback;
+ word = dataPtr->fallback; /* Both profiles REPLACE and TCL8 */
}
if (prefixBytes[(word >> 8)] != 0) {
if (dst + 1 > dstEnd) {
@@ -3401,7 +3602,7 @@ Iso88591FromUtfProc(
|| ((ch >= 0xD800) && (len < 3))
#endif
) {
- if (STOPONERROR) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -3414,7 +3615,7 @@ Iso88591FromUtfProc(
* Plunge on, using '?' as a fallback character.
*/
- ch = (Tcl_UniChar) '?';
+ ch = (Tcl_UniChar) '?'; /* Profiles TCL8 and REPLACE */
}
if (dst > dstEnd) {
@@ -3628,9 +3829,10 @@ EscapeToUtfProc(
if ((checked == dataPtr->numSubTables + 2)
|| (flags & TCL_ENCODING_END)) {
- if (!STOPONERROR) {
+ if (!PROFILE_STRICT(flags)) {
/*
- * Skip the unknown escape sequence.
+ * Skip the unknown escape sequence. TODO - bug?
+ * May be replace with UNICODE_REPLACE_CHAR?
*/
src += longest;
@@ -3803,7 +4005,7 @@ EscapeFromUtfProc(
if (word == 0) {
state = oldState;
- if (STOPONERROR) {
+ if (PROFILE_STRICT(flags)) {
result = TCL_CONVERT_UNKNOWN;
break;
}
@@ -4097,6 +4299,158 @@ InitializeEncodingSearchPath(
}
/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileParseName --
+ *
+ * Maps an encoding profile name to its integer equivalent.
+ *
+ * Results:
+ * TCL_OK on success or TCL_ERROR on failure.
+ *
+ * Side effects:
+ * Returns the profile enum value in *profilePtr
+ *
+ *------------------------------------------------------------------------
+ */
+int
+TclEncodingProfileNameToId(
+ Tcl_Interp *interp, /* For error messages. May be NULL */
+ const char *profileName, /* Name of profile */
+ int *profilePtr) /* Output */
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) {
+ if (!strcmp(profileName, encodingProfiles[i].name)) {
+ *profilePtr = encodingProfiles[i].value;
+ return TCL_OK;
+ }
+ }
+ if (interp) {
+ Tcl_SetObjResult(
+ interp,
+ Tcl_ObjPrintf(
+ "bad profile \"%s\". Must be \"tcl8\" or \"strict\".",
+ profileName));
+ Tcl_SetErrorCode(
+ interp, "TCL", "ENCODING", "PROFILE", profileName, NULL);
+ }
+ return TCL_ERROR;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingProfileValueToName --
+ *
+ * Maps an encoding profile value to its name.
+ *
+ * Results:
+ * Pointer to the name or NULL on failure. Caller must not make
+ * not modify the string and must make a copy to hold on to it.
+ *
+ * Side effects:
+ * None.
+ *------------------------------------------------------------------------
+ */
+const char *
+TclEncodingProfileIdToName(
+ Tcl_Interp *interp, /* For error messages. May be NULL */
+ int profileValue) /* Profile #define value */
+{
+ size_t i;
+
+ for (i = 0; i < sizeof(encodingProfiles) / sizeof(encodingProfiles[0]); ++i) {
+ if (profileValue == encodingProfiles[i].value) {
+ return encodingProfiles[i].name;
+ }
+ }
+ if (interp) {
+ Tcl_SetObjResult(
+ interp,
+ Tcl_ObjPrintf(
+ "Internal error. Bad profile id \"%d\".",
+ profileValue));
+ Tcl_SetErrorCode(
+ interp, "TCL", "ENCODING", "PROFILEID", NULL);
+ }
+ return NULL;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclEncodingExternalFlagsToInternal --
+ *
+ * Maps the flags supported in the encoding C API's to internal flags.
+ *
+ * For backward compatibility reasons, TCL_ENCODING_STOPONERROR is
+ * is mapped to the TCL_ENCODING_PROFILE_STRICT overwriting any profile
+ * specified.
+ *
+ * If no profile or an invalid profile is specified, it is set to
+ * the default.
+ *
+ * Results:
+ * Internal encoding flag mask.
+ *
+ * Side effects:
+ * None.
+ *
+ *------------------------------------------------------------------------
+ */
+int TclEncodingExternalFlagsToInternal(int flags)
+{
+ if (flags & TCL_ENCODING_STOPONERROR) {
+ TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_STRICT);
+ }
+ else {
+ int profile = TCL_ENCODING_PROFILE_GET(flags);
+ switch (profile) {
+ case TCL_ENCODING_PROFILE_TCL8:
+ case TCL_ENCODING_PROFILE_STRICT:
+ case TCL_ENCODING_PROFILE_REPLACE:
+ break;
+ case 0: /* Unspecified by caller */
+ default:
+ TCL_ENCODING_PROFILE_SET(flags, TCL_ENCODING_PROFILE_DEFAULT);
+ break;
+ }
+ }
+ return flags;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclGetEncodingProfiles --
+ *
+ * Get the list of supported encoding profiles.
+ *
+ * Results:
+ * None.
+ *
+ * Side effects:
+ * The list of profile names is stored in the interpreter result.
+ *
+ *------------------------------------------------------------------------
+ */
+void
+TclGetEncodingProfiles(Tcl_Interp *interp)
+{
+ int i, n;
+ Tcl_Obj *objPtr;
+ n = sizeof(encodingProfiles) / sizeof(encodingProfiles[0]);
+ objPtr = Tcl_NewListObj(n, NULL);
+ for (i = 0; i < n; ++i) {
+ Tcl_ListObjAppendElement(
+ interp, objPtr, Tcl_NewStringObj(encodingProfiles[i].name, -1));
+ }
+ Tcl_SetObjResult(interp, objPtr);
+}
+
+/*
* Local Variables:
* mode: c
* c-basic-offset: 4
diff --git a/generic/tclIO.c b/generic/tclIO.c
index 26d0011..6d6a935 100644
--- a/generic/tclIO.c
+++ b/generic/tclIO.c
@@ -1675,8 +1675,12 @@ Tcl_CreateChannel(
}
statePtr->inputEncodingState = NULL;
statePtr->inputEncodingFlags = TCL_ENCODING_START;
+ TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags,
+ TCL_ENCODING_PROFILE_DEFAULT);
statePtr->outputEncodingState = NULL;
statePtr->outputEncodingFlags = TCL_ENCODING_START;
+ TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags,
+ TCL_ENCODING_PROFILE_DEFAULT);
/*
* Set the channel up initially in AUTO input translation mode to accept
@@ -4343,21 +4347,6 @@ Write(
}
/*
- * Transfer encoding nocomplain/strict option to the encoding flags
- */
-
- if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) {
- statePtr->outputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN;
- } else {
- statePtr->outputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN;
- }
- if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) {
- statePtr->outputEncodingFlags |= TCL_ENCODING_STRICT;
- } else {
- statePtr->outputEncodingFlags &= ~TCL_ENCODING_STRICT;
- }
-
- /*
* Write the terminated escape sequence even if srcLen is 0.
*/
@@ -4681,21 +4670,6 @@ Tcl_GetsObj(
}
/*
- * Transfer encoding nocomplain/strict option to the encoding flags
- */
-
- if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN;
- }
- if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT;
- }
-
- /*
* Object used by FilterInputBytes to keep track of how much data has been
* consumed from the channel buffers.
*/
@@ -5458,21 +5432,6 @@ FilterInputBytes(
}
gsPtr->state = statePtr->inputEncodingState;
- /*
- * Transfer encoding nocomplain/strict option to the encoding flags
- */
-
- if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN;
- }
- if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT;
- }
-
result = Tcl_ExternalToUtf(NULL, gsPtr->encoding, raw, rawLen,
statePtr->inputEncodingFlags | TCL_ENCODING_NO_TERMINATE,
&statePtr->inputEncodingState, dst, spaceLeft, &gsPtr->rawRead,
@@ -6259,21 +6218,6 @@ ReadChars(
}
/*
- * Transfer encoding nocomplain/strict option to the encoding flags
- */
-
- if (GotFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_NOCOMPLAIN;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_NOCOMPLAIN;
- }
- if (GotFlag(statePtr, CHANNEL_ENCODING_STRICT)) {
- statePtr->inputEncodingFlags |= TCL_ENCODING_STRICT;
- } else {
- statePtr->inputEncodingFlags &= ~TCL_ENCODING_STRICT;
- }
-
- /*
* This routine is burdened with satisfying several constraints. It cannot
* append more than 'charsToRead` chars onto objPtr. This is measured
* after encoding and translation transformations are completed. There is
@@ -7810,7 +7754,7 @@ Tcl_BadChannelOption(
{
if (interp != NULL) {
const char *genericopt =
- "blocking buffering buffersize encoding eofchar nocomplainencoding strictencoding translation";
+ "blocking buffering buffersize encoding encodingprofile eofchar translation";
const char **argv;
size_t argc, i;
Tcl_DString ds;
@@ -7951,7 +7895,7 @@ Tcl_GetChannelOption(
return TCL_OK;
}
}
- if (len == 0 || HaveOpt(2, "-encoding")) {
+ if (len == 0 || HaveOpt(8, "-encoding")) {
if (len == 0) {
Tcl_DStringAppendElement(dsPtr, "-encoding");
}
@@ -7965,39 +7909,36 @@ Tcl_GetChannelOption(
return TCL_OK;
}
}
- if (len == 0 || HaveOpt(2, "-eofchar")) {
- char buf[4] = "";
+ if (len == 0 || HaveOpt(9, "-encodingprofile")) {
+ int profile;
+ const char *profileName;
if (len == 0) {
- Tcl_DStringAppendElement(dsPtr, "-eofchar");
+ Tcl_DStringAppendElement(dsPtr, "-encodingprofile");
}
- if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) {
- sprintf(buf, "%c", statePtr->inEofChar);
+ /* Note currently input and output profiles are same */
+ profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags);
+ profileName = TclEncodingProfileIdToName(interp, profile);
+ if (profileName == NULL) {
+ return TCL_ERROR;
}
+ Tcl_DStringAppendElement(dsPtr, profileName);
if (len > 0) {
- Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE);
return TCL_OK;
}
- Tcl_DStringAppendElement(dsPtr, buf);
}
- if (len == 0 || HaveOpt(1, "-nocomplainencoding")) {
+ if (len == 0 || HaveOpt(2, "-eofchar")) {
+ char buf[4] = "";
if (len == 0) {
- Tcl_DStringAppendElement(dsPtr, "-nocomplainencoding");
- }
- Tcl_DStringAppendElement(dsPtr,
- (flags & CHANNEL_ENCODING_NOCOMPLAIN) ? "1" : "0");
- if (len > 0) {
- return TCL_OK;
+ Tcl_DStringAppendElement(dsPtr, "-eofchar");
}
- }
- if (len == 0 || HaveOpt(1, "-strictencoding")) {
- if (len == 0) {
- Tcl_DStringAppendElement(dsPtr, "-strictencoding");
+ if ((flags & TCL_READABLE) && (statePtr->inEofChar != 0)) {
+ sprintf(buf, "%c", statePtr->inEofChar);
}
- Tcl_DStringAppendElement(dsPtr,
- (flags & CHANNEL_ENCODING_STRICT) ? "1" : "0");
if (len > 0) {
+ Tcl_DStringAppend(dsPtr, buf, TCL_INDEX_NONE);
return TCL_OK;
}
+ Tcl_DStringAppendElement(dsPtr, buf);
}
if (len == 0 || HaveOpt(1, "-translation")) {
if (len == 0) {
@@ -8180,6 +8121,7 @@ Tcl_SetChannelOption(
return TCL_OK;
} else if (HaveOpt(2, "-encoding")) {
Tcl_Encoding encoding;
+ int profile;
if ((newValue[0] == '\0') || (strcmp(newValue, "binary") == 0)) {
encoding = NULL;
@@ -8204,9 +8146,12 @@ Tcl_SetChannelOption(
Tcl_FreeEncoding(statePtr->encoding);
statePtr->encoding = encoding;
statePtr->inputEncodingState = NULL;
+ profile = TCL_ENCODING_PROFILE_GET(statePtr->inputEncodingFlags);
statePtr->inputEncodingFlags = TCL_ENCODING_START;
+ TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile);
statePtr->outputEncodingState = NULL;
statePtr->outputEncodingFlags = TCL_ENCODING_START;
+ TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile); /* Same as input */
ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR);
UpdateInterest(chanPtr);
return TCL_OK;
@@ -8244,30 +8189,13 @@ Tcl_SetChannelOption(
ResetFlag(statePtr, CHANNEL_EOF|CHANNEL_STICKY_EOF|CHANNEL_BLOCKED);
statePtr->inputEncodingFlags &= ~TCL_ENCODING_END;
return TCL_OK;
- } else if (HaveOpt(1, "-nocomplainencoding")) {
- int newMode;
-
- if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) {
- return TCL_ERROR;
- }
- if (newMode) {
- SetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN);
- } else {
- ResetFlag(statePtr, CHANNEL_ENCODING_NOCOMPLAIN);
- }
- ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR);
- return TCL_OK;
- } else if (HaveOpt(1, "-strictencoding")) {
- int newMode;
-
- if (Tcl_GetBoolean(interp, newValue, &newMode) == TCL_ERROR) {
+ } else if (HaveOpt(1, "-encodingprofile")) {
+ int profile;
+ if (TclEncodingProfileNameToId(interp, newValue, &profile) != TCL_OK) {
return TCL_ERROR;
}
- if (newMode) {
- SetFlag(statePtr, CHANNEL_ENCODING_STRICT);
- } else {
- ResetFlag(statePtr, CHANNEL_ENCODING_STRICT);
- }
+ TCL_ENCODING_PROFILE_SET(statePtr->inputEncodingFlags, profile);
+ TCL_ENCODING_PROFILE_SET(statePtr->outputEncodingFlags, profile);
ResetFlag(statePtr, CHANNEL_NEED_MORE_DATA|CHANNEL_ENCODING_ERROR);
return TCL_OK;
} else if (HaveOpt(1, "-translation")) {
@@ -9344,12 +9272,17 @@ TclCopyChannel(
* of the bytes themselves.
*/
+ /*
+ * TODO - should really only allow lossless profiles. Below reflects
+ * Tcl 8.7 alphas prior to encoding profiles
+ */
+
moveBytes = inStatePtr->inEofChar == '\0' /* No eofChar to stop input */
&& inStatePtr->inputTranslation == TCL_TRANSLATE_LF
&& outStatePtr->outputTranslation == TCL_TRANSLATE_LF
&& inStatePtr->encoding == outStatePtr->encoding
- && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT
- && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN;
+ && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT
+ && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8;
/*
* Allocate a new CopyState to maintain info about the current copy in
@@ -9677,8 +9610,8 @@ CopyData(
inBinary = (inStatePtr->encoding == NULL);
outBinary = (outStatePtr->encoding == NULL);
sameEncoding = inStatePtr->encoding == outStatePtr->encoding
- && (inStatePtr->flags & TCL_ENCODING_STRICT) != TCL_ENCODING_STRICT
- && outStatePtr->flags & TCL_ENCODING_NOCOMPLAIN;
+ && TCL_ENCODING_PROFILE_GET(inStatePtr->flags) != TCL_ENCODING_PROFILE_STRICT
+ && TCL_ENCODING_PROFILE_GET(outStatePtr->flags) == TCL_ENCODING_PROFILE_TCL8;
if (!(inBinary || sameEncoding)) {
TclNewObj(bufObj);
diff --git a/generic/tclIO.h b/generic/tclIO.h
index 62cf6e8..a050010 100644
--- a/generic/tclIO.h
+++ b/generic/tclIO.h
@@ -277,16 +277,11 @@ typedef struct ChannelState {
* encountered an encoding error */
#define CHANNEL_RAW_MODE (1<<16) /* When set, notes that the Raw API is
* being used. */
-#define CHANNEL_ENCODING_NOCOMPLAIN (1<<17) /* set if option
- * -nocomplainencoding is set to 1 */
-#define CHANNEL_ENCODING_STRICT (1<<18) /* set if option
- * -strictencoding is set to 1 */
#define CHANNEL_INCLOSE (1<<19) /* Channel is currently being closed.
* Its structures are still live and
* usable, but it may not be closed
* again from within the close
* handler. */
-#define ENCODING_FAILINDEX (1<<20) /* Internal flag, fail on Invalid bytes only */
#define CHANNEL_CLOSEDWRITE (1<<21) /* Channel write side has been closed.
* No further Tcl-level write IO on
* the channel is allowed. */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index 827fd6f..9a9c0ae 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -2875,7 +2875,19 @@ MODULE_SCOPE int tclFindExecutableSearchDone;
MODULE_SCOPE char *tclMemDumpFileName;
MODULE_SCOPE TclPlatformType tclPlatform;
+/*
+ * Declarations related to internal encoding functions.
+ */
+
MODULE_SCOPE Tcl_Encoding tclIdentityEncoding;
+MODULE_SCOPE int
+TclEncodingProfileNameToId(Tcl_Interp *interp,
+ const char *profileName,
+ int *profilePtr);
+MODULE_SCOPE const char *TclEncodingProfileIdToName(Tcl_Interp *interp,
+ int profileId);
+MODULE_SCOPE int TclEncodingExternalFlagsToInternal(int flags);
+MODULE_SCOPE void TclGetEncodingProfiles(Tcl_Interp *interp);
/*
* TIP #233 (Virtualized Time)
@@ -4748,6 +4760,8 @@ MODULE_SCOPE Tcl_LibraryInitProc TclThread_Init;
MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_Init;
MODULE_SCOPE Tcl_LibraryInitProc Procbodytest_SafeInit;
+
+
/*
*----------------------------------------------------------------
* Macro used by the Tcl core to check whether a pattern has any characters
diff --git a/generic/tclUtil.c b/generic/tclUtil.c
index e96a564..3abd615 100644
--- a/generic/tclUtil.c
+++ b/generic/tclUtil.c
@@ -4026,10 +4026,11 @@ TclGetProcessGlobalValue(
Tcl_MutexLock(&pgvPtr->mutex);
epoch = ++pgvPtr->epoch;
- Tcl_UtfToExternalDStringEx(pgvPtr->encoding, pgvPtr->value,
- pgvPtr->numBytes, TCL_ENCODING_NOCOMPLAIN, &native);
- Tcl_ExternalToUtfDStringEx(current, Tcl_DStringValue(&native),
- Tcl_DStringLength(&native), TCL_ENCODING_NOCOMPLAIN, &newValue);
+ Tcl_UtfToExternalDStringEx(NULL, pgvPtr->encoding, pgvPtr->value,
+ pgvPtr->numBytes, TCL_ENCODING_PROFILE_TCL8, &native, NULL);
+ Tcl_ExternalToUtfDStringEx(NULL, current, Tcl_DStringValue(&native),
+ Tcl_DStringLength(&native), TCL_ENCODING_PROFILE_TCL8,
+ &newValue, NULL);
Tcl_DStringFree(&native);
Tcl_Free(pgvPtr->value);
pgvPtr->value = (char *)Tcl_Alloc(Tcl_DStringLength(&newValue) + 1);
diff --git a/generic/tclZlib.c b/generic/tclZlib.c
index 5a6dbc4..dc7c3f3 100644
--- a/generic/tclZlib.c
+++ b/generic/tclZlib.c
@@ -547,8 +547,8 @@ ExtractHeader(
}
}
- Tcl_ExternalToUtfDStringEx(latin1enc, (char *) headerPtr->comment, -1,
- TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->comment, -1,
+ TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
SetValue(dictObj, "comment", Tcl_DStringToObj(&tmp));
}
SetValue(dictObj, "crc", Tcl_NewBooleanObj(headerPtr->hcrc));
@@ -564,8 +564,8 @@ ExtractHeader(
}
}
- Tcl_ExternalToUtfDStringEx(latin1enc, (char *) headerPtr->name, -1,
- TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_ExternalToUtfDStringEx(NULL, latin1enc, (char *) headerPtr->name, -1,
+ TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
SetValue(dictObj, "filename", Tcl_DStringToObj(&tmp));
}
if (headerPtr->os != 255) {
diff --git a/unix/tclUnixChan.c b/unix/tclUnixChan.c
index c41cdd9..b81676e 100644
--- a/unix/tclUnixChan.c
+++ b/unix/tclUnixChan.c
@@ -1028,11 +1028,11 @@ TtyGetOptionProc(
tcgetattr(fsPtr->fileState.fd, &iostate);
Tcl_DStringInit(&ds);
- Tcl_ExternalToUtfDStringEx(NULL, (char *) &iostate.c_cc[VSTART], 1, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, (char *) &iostate.c_cc[VSTART], 1, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
Tcl_DStringAppendElement(dsPtr, Tcl_DStringValue(&ds));
TclDStringClear(&ds);
- Tcl_ExternalToUtfDStringEx(NULL, (char *) &iostate.c_cc[VSTOP], 1, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, (char *) &iostate.c_cc[VSTOP], 1, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
Tcl_DStringAppendElement(dsPtr, Tcl_DStringValue(&ds));
Tcl_DStringFree(&ds);
}
diff --git a/unix/tclUnixFCmd.c b/unix/tclUnixFCmd.c
index b205061..7753cec 100644
--- a/unix/tclUnixFCmd.c
+++ b/unix/tclUnixFCmd.c
@@ -762,16 +762,16 @@ TclpObjCopyDirectory(
Tcl_Obj *transPtr;
transPtr = Tcl_FSGetTranslatedPath(NULL,srcPathPtr);
- Tcl_UtfToExternalDStringEx(NULL,
+ Tcl_UtfToExternalDStringEx(NULL, NULL,
(transPtr != NULL ? TclGetString(transPtr) : NULL),
- -1, TCL_ENCODING_NOCOMPLAIN, &srcString);
+ -1, TCL_ENCODING_PROFILE_TCL8, &srcString, NULL);
if (transPtr != NULL) {
Tcl_DecrRefCount(transPtr);
}
transPtr = Tcl_FSGetTranslatedPath(NULL,destPathPtr);
- Tcl_UtfToExternalDStringEx(NULL,
+ Tcl_UtfToExternalDStringEx(NULL, NULL,
(transPtr != NULL ? TclGetString(transPtr) : NULL),
- -1, TCL_ENCODING_NOCOMPLAIN, &dstString);
+ -1, TCL_ENCODING_PROFILE_TCL8, &dstString, NULL);
if (transPtr != NULL) {
Tcl_DecrRefCount(transPtr);
}
@@ -826,9 +826,9 @@ TclpObjRemoveDirectory(
int ret;
Tcl_Obj *transPtr = Tcl_FSGetTranslatedPath(NULL, pathPtr);
- Tcl_UtfToExternalDStringEx(NULL,
+ Tcl_UtfToExternalDStringEx(NULL, NULL,
(transPtr != NULL ? TclGetString(transPtr) : NULL),
- -1, TCL_ENCODING_NOCOMPLAIN, &pathString);
+ -1, TCL_ENCODING_PROFILE_TCL8, &pathString, NULL);
if (transPtr != NULL) {
Tcl_DecrRefCount(transPtr);
}
@@ -886,7 +886,7 @@ DoRemoveDirectory(
result = TCL_OK;
if ((errno != EEXIST) || (recursive == 0)) {
if (errorPtr != NULL) {
- Tcl_ExternalToUtfDStringEx(NULL, path, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, errorPtr);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, path, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL);
}
result = TCL_ERROR;
}
@@ -1135,7 +1135,7 @@ TraverseUnixTree(
end:
if (errfile != NULL) {
if (errorPtr != NULL) {
- Tcl_ExternalToUtfDStringEx(NULL, errfile, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, errorPtr);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, errfile, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL);
}
result = TCL_ERROR;
}
@@ -1205,8 +1205,8 @@ TraversalCopy(
*/
if (errorPtr != NULL) {
- Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(dstPtr),
- Tcl_DStringLength(dstPtr), TCL_ENCODING_NOCOMPLAIN, errorPtr);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(dstPtr),
+ Tcl_DStringLength(dstPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL);
}
return TCL_ERROR;
}
@@ -1256,8 +1256,8 @@ TraversalDelete(
break;
}
if (errorPtr != NULL) {
- Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(srcPtr),
- Tcl_DStringLength(srcPtr), TCL_ENCODING_NOCOMPLAIN, errorPtr);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(srcPtr),
+ Tcl_DStringLength(srcPtr), TCL_ENCODING_PROFILE_TCL8, errorPtr, NULL);
}
return TCL_ERROR;
}
@@ -1424,7 +1424,7 @@ GetOwnerAttribute(
} else {
Tcl_DString ds;
- Tcl_ExternalToUtfDStringEx(NULL, pwPtr->pw_name, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, pwPtr->pw_name, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
*attributePtrPtr = Tcl_DStringToObj(&ds);
}
return TCL_OK;
@@ -2086,7 +2086,7 @@ TclpObjNormalizePath(
*/
Tcl_DStringFree(&ds);
- Tcl_ExternalToUtfDStringEx(NULL, normPath, newNormLen, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, normPath, newNormLen, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
if (path[nextCheckpoint] != '\0') {
/*
@@ -2179,7 +2179,7 @@ TclUnixOpenTemporaryFile(
if (dirObj) {
string = Tcl_GetStringFromObj(dirObj, &length);
- Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &templ);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL);
} else {
Tcl_DStringInit(&templ);
Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */
@@ -2189,7 +2189,7 @@ TclUnixOpenTemporaryFile(
if (basenameObj) {
string = Tcl_GetStringFromObj(basenameObj, &length);
- Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
TclDStringAppendDString(&templ, &tmp);
Tcl_DStringFree(&tmp);
} else {
@@ -2201,7 +2201,7 @@ TclUnixOpenTemporaryFile(
#ifdef HAVE_MKSTEMPS
if (extensionObj) {
string = Tcl_GetStringFromObj(extensionObj, &length);
- Tcl_UtfToExternalDStringEx(NULL, string, length, TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, string, length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
TclDStringAppendDString(&templ, &tmp);
fd = mkstemps(Tcl_DStringValue(&templ), Tcl_DStringLength(&tmp));
Tcl_DStringFree(&tmp);
@@ -2217,8 +2217,8 @@ TclUnixOpenTemporaryFile(
}
if (resultingNameObj) {
- Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&templ),
- Tcl_DStringLength(&templ), TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ),
+ Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
Tcl_SetStringObj(resultingNameObj, Tcl_DStringValue(&tmp),
Tcl_DStringLength(&tmp));
Tcl_DStringFree(&tmp);
@@ -2304,7 +2304,7 @@ TclpCreateTemporaryDirectory(
if (dirObj) {
string = TclGetString(dirObj);
- Tcl_UtfToExternalDStringEx(NULL, string, dirObj->length, TCL_ENCODING_NOCOMPLAIN, &templ);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, string, dirObj->length, TCL_ENCODING_PROFILE_TCL8, &templ, NULL);
} else {
Tcl_DStringInit(&templ);
Tcl_DStringAppend(&templ, DefaultTempDir(), TCL_INDEX_NONE); /* INTL: native */
@@ -2317,7 +2317,7 @@ TclpCreateTemporaryDirectory(
if (basenameObj) {
string = TclGetString(basenameObj);
if (basenameObj->length) {
- Tcl_UtfToExternalDStringEx(NULL, string, basenameObj->length, TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, string, basenameObj->length, TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
TclDStringAppendDString(&templ, &tmp);
Tcl_DStringFree(&tmp);
} else {
@@ -2342,8 +2342,8 @@ TclpCreateTemporaryDirectory(
* The template has been updated. Tell the caller what it was.
*/
- Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&templ),
- Tcl_DStringLength(&templ), TCL_ENCODING_NOCOMPLAIN, &tmp);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&templ),
+ Tcl_DStringLength(&templ), TCL_ENCODING_PROFILE_TCL8, &tmp, NULL);
Tcl_DStringFree(&templ);
return Tcl_DStringToObj(&tmp);
}
diff --git a/unix/tclUnixFile.c b/unix/tclUnixFile.c
index 830ed6f..fc297cb 100644
--- a/unix/tclUnixFile.c
+++ b/unix/tclUnixFile.c
@@ -153,7 +153,7 @@ TclpFindExecutable(
#endif
{
encoding = Tcl_GetEncoding(NULL, NULL);
- Tcl_ExternalToUtfDStringEx(encoding, name, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &utfName);
+ Tcl_ExternalToUtfDStringEx(NULL, encoding, name, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &utfName, NULL);
TclSetObjNameOfExecutable(
Tcl_NewStringObj(Tcl_DStringValue(&utfName), TCL_INDEX_NONE), encoding);
Tcl_DStringFree(&utfName);
@@ -179,8 +179,8 @@ TclpFindExecutable(
Tcl_DStringAppend(&nameString, name, TCL_INDEX_NONE);
Tcl_DStringFree(&buffer);
- Tcl_UtfToExternalDStringEx(NULL, Tcl_DStringValue(&cwd),
- Tcl_DStringLength(&cwd), TCL_ENCODING_NOCOMPLAIN, &buffer);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, Tcl_DStringValue(&cwd),
+ Tcl_DStringLength(&cwd), TCL_ENCODING_PROFILE_TCL8, &buffer, NULL);
if (Tcl_DStringValue(&cwd)[Tcl_DStringLength(&cwd) -1] != '/') {
TclDStringAppendLiteral(&buffer, "/");
}
@@ -189,8 +189,8 @@ TclpFindExecutable(
Tcl_DStringFree(&nameString);
encoding = Tcl_GetEncoding(NULL, NULL);
- Tcl_ExternalToUtfDStringEx(encoding, Tcl_DStringValue(&buffer), TCL_INDEX_NONE,
- TCL_ENCODING_NOCOMPLAIN, &utfName);
+ Tcl_ExternalToUtfDStringEx(NULL, encoding, Tcl_DStringValue(&buffer), TCL_INDEX_NONE,
+ TCL_ENCODING_PROFILE_TCL8, &utfName, NULL);
TclSetObjNameOfExecutable(
Tcl_NewStringObj(Tcl_DStringValue(&utfName), TCL_INDEX_NONE), encoding);
Tcl_DStringFree(&utfName);
@@ -825,7 +825,7 @@ TclpReadlink(
return NULL;
}
- Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, linkPtr);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, link, length, TCL_ENCODING_PROFILE_TCL8, linkPtr, NULL);
return Tcl_DStringValue(linkPtr);
#else
return NULL;
@@ -994,7 +994,7 @@ TclpObjLink(
return NULL;
}
- Tcl_ExternalToUtfDStringEx(NULL, link, length, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, link, length, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
linkPtr = Tcl_DStringToObj(&ds);
Tcl_IncrRefCount(linkPtr);
return linkPtr;
@@ -1059,7 +1059,7 @@ TclpNativeToNormalized(
{
Tcl_DString ds;
- Tcl_ExternalToUtfDStringEx(NULL, (const char *) clientData, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, (const char *) clientData, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
return Tcl_DStringToObj(&ds);
}
@@ -1113,7 +1113,7 @@ TclNativeCreateNativeRep(
}
str = Tcl_GetStringFromObj(validPathPtr, &len);
- Tcl_UtfToExternalDStringEx(NULL, str, len, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_UtfToExternalDStringEx(NULL, NULL, str, len, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
len = Tcl_DStringLength(&ds) + sizeof(char);
if (strlen(Tcl_DStringValue(&ds)) < len - sizeof(char)) {
/* See bug [3118489]: NUL in filenames */
diff --git a/unix/tclUnixInit.c b/unix/tclUnixInit.c
index 8f7a737..71b059a 100644
--- a/unix/tclUnixInit.c
+++ b/unix/tclUnixInit.c
@@ -473,7 +473,7 @@ TclpInitLibraryPath(
*/
str = getenv("TCL_LIBRARY"); /* INTL: Native. */
- Tcl_ExternalToUtfDStringEx(NULL, str, TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &buffer);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, str, TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &buffer, NULL);
str = Tcl_DStringValue(&buffer);
if ((str != NULL) && (str[0] != '\0')) {
diff --git a/win/tclWinSock.c b/win/tclWinSock.c
index e5c7ee3..4eeeeec 100644
--- a/win/tclWinSock.c
+++ b/win/tclWinSock.c
@@ -373,8 +373,8 @@ InitializeHostName(
Tcl_DStringSetLength(&inDs, 256);
if (gethostname(Tcl_DStringValue(&inDs),
Tcl_DStringLength(&inDs)) == 0) {
- Tcl_ExternalToUtfDStringEx(NULL, Tcl_DStringValue(&inDs),
- TCL_INDEX_NONE, TCL_ENCODING_NOCOMPLAIN, &ds);
+ Tcl_ExternalToUtfDStringEx(NULL, NULL, Tcl_DStringValue(&inDs),
+ TCL_INDEX_NONE, TCL_ENCODING_PROFILE_TCL8, &ds, NULL);
}
Tcl_DStringFree(&inDs);
}