diff options
| author | griffin <briang42@easystreet.net> | 2023-03-25 00:29:59 (GMT) |
|---|---|---|
| committer | griffin <briang42@easystreet.net> | 2023-03-25 00:29:59 (GMT) |
| commit | 879e7b7112b429b14e6c6bb70873c4fe41d59e1c (patch) | |
| tree | 8afefec0015395ec4eceda2146ca978b63782cb3 /generic/tclCmdAH.c | |
| parent | ef7bbf24e8812d54b66b071036a2ff875ccb98d6 (diff) | |
| parent | b0a23df7d6a04013d6ee706f7c7a7f12b6d5b3ef (diff) | |
| download | tcl-879e7b7112b429b14e6c6bb70873c4fe41d59e1c.zip tcl-879e7b7112b429b14e6c6bb70873c4fe41d59e1c.tar.gz tcl-879e7b7112b429b14e6c6bb70873c4fe41d59e1c.tar.bz2 | |
Merge trunk
Diffstat (limited to 'generic/tclCmdAH.c')
| -rw-r--r-- | generic/tclCmdAH.c | 386 |
1 files changed, 233 insertions, 153 deletions
diff --git a/generic/tclCmdAH.c b/generic/tclCmdAH.c index 11f8ea2..8d0f43e 100644 --- a/generic/tclCmdAH.c +++ b/generic/tclCmdAH.c @@ -12,6 +12,8 @@ */ #include "tclInt.h" +#include "tclIO.h" +#include "tclTomMath.h" #ifdef _WIN32 # include "tclWinInt.h" #endif @@ -49,6 +51,7 @@ static Tcl_ObjCmdProc EncodingConvertfromObjCmd; static Tcl_ObjCmdProc EncodingConverttoObjCmd; static Tcl_ObjCmdProc EncodingDirsObjCmd; static Tcl_ObjCmdProc EncodingNamesObjCmd; +static Tcl_ObjCmdProc EncodingProfilesObjCmd; static Tcl_ObjCmdProc EncodingSystemObjCmd; static inline int ForeachAssignments(Tcl_Interp *interp, struct ForeachState *statePtr); @@ -384,6 +387,7 @@ TclInitEncodingCmd( {"convertto", EncodingConverttoObjCmd, TclCompileBasic1To3ArgCmd, NULL, NULL, 0}, {"dirs", EncodingDirsObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {"names", EncodingNamesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, + {"profiles", EncodingProfilesObjCmd, TclCompileBasic0ArgCmd, NULL, NULL, 0}, {"system", EncodingSystemObjCmd, TclCompileBasic0Or1ArgCmd, NULL, NULL, 1}, {NULL, NULL, NULL, NULL, NULL, 0} }; @@ -392,6 +396,116 @@ TclInitEncodingCmd( } /* + *------------------------------------------------------------------------ + * + * EncodingConvertParseOptions -- + * + * Common routine for parsing arguments passed to encoding convertfrom + * and encoding convertto. + * + * Results: + * TCL_OK or TCL_ERROR. + * + * Side effects: + * On success, + * - *encPtr is set to the encoding. Must be freed with Tcl_FreeEncoding + * if non-NULL + * - *dataObjPtr is set to the Tcl_Obj containing the data to encode or + * decode + * - *profilePtr is set to encoding error handling profile + * - *failVarPtr is set to -failindex option value or NULL + * On error, all of the above are uninitialized. + * + *------------------------------------------------------------------------ + */ +static int +EncodingConvertParseOptions ( + Tcl_Interp *interp, /* For error messages. May be NULL */ + int objc, /* Number of arguments */ + Tcl_Obj *const objv[], /* Argument objects as passed to command. */ + Tcl_Encoding *encPtr, /* Where to store the encoding */ + Tcl_Obj **dataObjPtr, /* Where to store ptr to Tcl_Obj containing data */ + int *profilePtr, /* Bit mask of encoding option profile */ + Tcl_Obj **failVarPtr /* Where to store -failindex option value */ +) +{ + static const char *const options[] = {"-profile", "-failindex", NULL}; + enum convertfromOptions { PROFILE, FAILINDEX } optIndex; + Tcl_Encoding encoding; + Tcl_Obj *dataObj; + Tcl_Obj *failVarObj; +#if TCL_MAJOR_VERSION > 8 || defined(TCL_NO_DEPRECATED) + int profile = TCL_ENCODING_PROFILE_TCL8; /* TODO - default for Tcl9? */ +#else + int profile = TCL_ENCODING_PROFILE_TCL8; +#endif + + /* + * Possible combinations: + * 1) data -> objc = 2 + * 2) ?options? encoding data -> objc >= 3 + * It is intentional that specifying option forces encoding to be + * specified. Less prone to user error. This should have always been + * the case even in 8.6 imho where there were no options (ie (1) + * should never have been allowed) + */ + + if (objc == 1) { +numArgsError: /* ONLY jump here if nothing needs to be freed!!! */ + Tcl_WrongNumArgs(interp, + 1, + objv, + "?-profile profile? ?-failindex var? encoding data"); + ((Interp *)interp)->flags |= INTERP_ALTERNATE_WRONG_ARGS; + Tcl_WrongNumArgs(interp, 1, objv, "data"); + return TCL_ERROR; + } + + failVarObj = NULL; + if (objc == 2) { + encoding = Tcl_GetEncoding(interp, NULL); + dataObj = objv[1]; + } else { + int argIndex; + for (argIndex = 1; argIndex < (objc-2); ++argIndex) { + if (Tcl_GetIndexFromObj( + interp, objv[argIndex], options, "option", 0, &optIndex) + != TCL_OK) { + return TCL_ERROR; + } + if (++argIndex == (objc - 2)) { + goto numArgsError; + } + switch (optIndex) { + case PROFILE: + if (TclEncodingProfileNameToId(interp, + Tcl_GetString(objv[argIndex]), + &profile) != TCL_OK) { + return TCL_ERROR; + } + break; + case FAILINDEX: + failVarObj = objv[argIndex]; + break; + } + } + /* Get encoding after opts so no need to free it on option error */ + if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) + != TCL_OK) { + return TCL_ERROR; + } + dataObj = objv[objc - 1]; + } + + *encPtr = encoding; + *dataObjPtr = dataObj; + *profilePtr = profile; + *failVarPtr = failVarObj; + + return TCL_OK; +} + +/* *---------------------------------------------------------------------- * * EncodingConvertfromObjCmd -- @@ -417,94 +531,66 @@ EncodingConvertfromObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length = 0; /* Length of the byte array being converted */ const char *bytesPtr; /* Pointer to the first byte of the array */ - int flags = 0; - size_t result; - Tcl_Obj *failVarObj = NULL; - /* - * Decode parameters: - * Possible combinations: - * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -strict data -> objc = 3 - * 6) -strict encoding data -> objc = 4 - * 7) -failindex val data -> objc = 4 - * 8) -failindex val encoding data -> objc = 5 - */ + int flags; + int result; + Tcl_Obj *failVarObj; + Tcl_Size errorLocation; - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if (objc > 2 && objc < 6) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - bytesPtr = Tcl_GetString(objv[1]); - if (bytesPtr[0] == '-' && bytesPtr[1] == 'n' - && !strncmp(bytesPtr, "-nocomplain", strlen(bytesPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 's' - && !strncmp(bytesPtr, "-strict", strlen(bytesPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - } else if (bytesPtr[0] == '-' && bytesPtr[1] == 'f' - && !strncmp(bytesPtr, "-failindex", strlen(bytesPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvFromError; - } - failVarObj = objv[2]; - objcUnprocessed -= 2; - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } - break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); - break; - default: - goto encConvFromError; - } - } else { - encConvFromError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + if (EncodingConvertParseOptions( + interp, objc, objv, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } /* - * Convert the string into a byte array in 'ds' + * Convert the string into a byte array in 'ds'. */ bytesPtr = (char *) Tcl_GetBytesFromObj(interp, data, &length); + if (bytesPtr == NULL) { return TCL_ERROR; } - result = Tcl_ExternalToUtfDStringEx(encoding, bytesPtr, length, - flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { - if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - char buf[TCL_INTEGER_SPACE]; - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected byte sequence starting at index %" - TCL_Z_MODIFIER "u: '\\x%X'", result, UCHAR(bytesPtr[result]))); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); + result = Tcl_ExternalToUtfDStringEx(interp, encoding, bytesPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + break; + } + + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + failIndex, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } } - /* * Note that we cannot use Tcl_DStringResult here because it will * truncate the string at the first null byte. @@ -512,9 +598,7 @@ EncodingConvertfromObjCmd( Tcl_SetObjResult(interp, Tcl_DStringToObj(&ds)); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; @@ -547,60 +631,14 @@ EncodingConverttoObjCmd( Tcl_Encoding encoding; /* Encoding to use */ size_t length; /* Length of the string being converted */ const char *stringPtr; /* Pointer to the first byte of the string */ - size_t result; - int flags = 0; - Tcl_Obj *failVarObj = NULL; - - /* - * Decode parameters: - * Possible combinations: - * 1) data -> objc = 2 - * 2) encoding data -> objc = 3 - * 3) -nocomplain data -> objc = 3 - * 4) -nocomplain encoding data -> objc = 4 - * 5) -failindex val data -> objc = 4 - * 6) -failindex val encoding data -> objc = 5 - */ + int result; + int flags; + Tcl_Obj *failVarObj; + Tcl_Size errorLocation; - if (objc == 2) { - encoding = Tcl_GetEncoding(interp, NULL); - data = objv[1]; - } else if (objc > 2 && objc < 6) { - int objcUnprocessed = objc; - data = objv[objc - 1]; - stringPtr = Tcl_GetString(objv[1]); - if (stringPtr[0] == '-' && stringPtr[1] == 'n' - && !strncmp(stringPtr, "-nocomplain", strlen(stringPtr))) { - flags = TCL_ENCODING_NOCOMPLAIN; - objcUnprocessed--; - } else if (stringPtr[0] == '-' && stringPtr[1] == 's' - && !strncmp(stringPtr, "-strict", strlen(stringPtr))) { - flags = TCL_ENCODING_STRICT; - objcUnprocessed--; - } else if (stringPtr[0] == '-' && stringPtr[1] == 'f' - && !strncmp(stringPtr, "-failindex", strlen(stringPtr))) { - /* at least two additional arguments needed */ - if (objc < 4) { - goto encConvToError; - } - failVarObj = objv[2]; - objcUnprocessed -= 2; - } - switch (objcUnprocessed) { - case 3: - if (Tcl_GetEncodingFromObj(interp, objv[objc - 2], &encoding) != TCL_OK) { - return TCL_ERROR; - } - break; - case 2: - encoding = Tcl_GetEncoding(interp, NULL); - break; - default: - goto encConvToError; - } - } else { - encConvToError: - Tcl_WrongNumArgs(interp, 1, objv, "?-nocomplain? ?-strict? ?-failindex var? ?encoding? data"); + if (EncodingConvertParseOptions( + interp, objc, objv, &encoding, &data, &flags, &failVarObj) + != TCL_OK) { return TCL_ERROR; } @@ -609,40 +647,54 @@ EncodingConverttoObjCmd( */ stringPtr = Tcl_GetStringFromObj(data, &length); - result = Tcl_UtfToExternalDStringEx(encoding, stringPtr, length, - flags, &ds); - if ((!(flags & TCL_ENCODING_NOCOMPLAIN) || ((flags & TCL_ENCODING_STRICT) == TCL_ENCODING_STRICT)) && (result != TCL_INDEX_NONE)) { - if (failVarObj != NULL) { - /* I hope, wide int will cover size_t data type */ - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewWideIntObj(result), TCL_LEAVE_ERR_MSG) == NULL) { - return TCL_ERROR; - } - } else { - size_t pos = Tcl_NumUtfChars(stringPtr, result); - int ucs4; - char buf[TCL_INTEGER_SPACE]; - TclUtfToUCS4(&stringPtr[result], &ucs4); - sprintf(buf, "%" TCL_Z_MODIFIER "u", result); - Tcl_SetObjResult(interp, Tcl_ObjPrintf("unexpected character at index %" - TCL_Z_MODIFIER "u: 'U+%06X'", pos, ucs4)); - Tcl_SetErrorCode(interp, "TCL", "ENCODING", "ILLEGALSEQUENCE", - buf, NULL); - Tcl_DStringFree(&ds); + result = Tcl_UtfToExternalDStringEx(interp, encoding, stringPtr, length, flags, + &ds, failVarObj ? &errorLocation : NULL); + /* NOTE: ds must be freed beyond this point even on error */ + + switch (result) { + case TCL_OK: + errorLocation = TCL_INDEX_NONE; + break; + case TCL_ERROR: + /* Error in parameters. Should not happen. interp will have error */ + Tcl_DStringFree(&ds); + return TCL_ERROR; + default: + /* + * One of the TCL_CONVERT_* errors. If we were not interested in the + * error location, interp result would already have been filled in + * and we can just return the error. Otherwise, we have to return + * what could be decoded and the returned error location. + */ + if (failVarObj == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } - } else if (failVarObj != NULL) { - if (Tcl_ObjSetVar2(interp, failVarObj, NULL, Tcl_NewIntObj(-1), TCL_LEAVE_ERR_MSG) == NULL) { + break; + } + /* + * TCL_OK or a TCL_CONVERT_* error where the caller wants back as much + * data as was converted. + */ + if (failVarObj) { + Tcl_Obj *failIndex; + TclNewIndexObj(failIndex, errorLocation); + if (Tcl_ObjSetVar2(interp, + failVarObj, + NULL, + failIndex, + TCL_LEAVE_ERR_MSG) == NULL) { + Tcl_DStringFree(&ds); return TCL_ERROR; } } + Tcl_SetObjResult(interp, Tcl_NewByteArrayObj((unsigned char*) Tcl_DStringValue(&ds), Tcl_DStringLength(&ds))); Tcl_DStringFree(&ds); - /* - * We're done with the encoding - */ + /* We're done with the encoding */ Tcl_FreeEncoding(encoding); return TCL_OK; @@ -727,6 +779,34 @@ EncodingNamesObjCmd( /* *----------------------------------------------------------------------------- * + * EncodingProfilesObjCmd -- + * + * This command returns a list of the available encoding profiles + * + * Results: + * Returns a standard Tcl result + * + *----------------------------------------------------------------------------- + */ + +int +EncodingProfilesObjCmd( + TCL_UNUSED(void *), + Tcl_Interp* interp, /* Tcl interpreter */ + int objc, /* Number of command line args */ + Tcl_Obj* const objv[]) /* Vector of command line args */ +{ + if (objc > 1) { + Tcl_WrongNumArgs(interp, 1, objv, NULL); + return TCL_ERROR; + } + TclGetEncodingProfiles(interp); + return TCL_OK; +} + +/* + *----------------------------------------------------------------------------- + * * EncodingSystemObjCmd -- * * This command retrieves or changes the system encoding |
