summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2024-06-27 13:45:10 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2024-06-27 13:45:10 (GMT)
commit7bd797112eda2ff80a3e6be08a841633e8d57443 (patch)
tree831ae79c5f76045c58d4ed378cd7bb48e759b9df
parentf046e62c9bf3a05b3df943ba3a6b4af5da8a4fd0 (diff)
downloadtcl-7bd797112eda2ff80a3e6be08a841633e8d57443.zip
tcl-7bd797112eda2ff80a3e6be08a841633e8d57443.tar.gz
tcl-7bd797112eda2ff80a3e6be08a841633e8d57443.tar.bz2
(backport) Add tcl::unsupported::icu command (why not!)
-rw-r--r--generic/tclBasic.c22
-rw-r--r--generic/tclIcu.c701
-rw-r--r--generic/tclInt.h1
-rw-r--r--library/icu.tcl145
-rw-r--r--library/tclIndex1
-rw-r--r--tests/icu.test51
-rw-r--r--unix/Makefile.in7
-rw-r--r--win/Makefile.in1
-rw-r--r--win/makefile.vc1
9 files changed, 921 insertions, 9 deletions
diff --git a/generic/tclBasic.c b/generic/tclBasic.c
index 507d987..9c35889 100644
--- a/generic/tclBasic.c
+++ b/generic/tclBasic.c
@@ -352,7 +352,7 @@ static const CmdInfo builtInCmds[] = {
{"lmap", Tcl_LmapObjCmd, TclCompileLmapCmd, TclNRLmapCmd, CMD_IS_SAFE},
{"lpop", Tcl_LpopObjCmd, NULL, NULL, CMD_IS_SAFE},
{"lrange", Tcl_LrangeObjCmd, TclCompileLrangeCmd, NULL, CMD_IS_SAFE},
- {"lremove", Tcl_LremoveObjCmd, NULL, NULL, CMD_IS_SAFE},
+ {"lremove", Tcl_LremoveObjCmd, NULL, NULL, CMD_IS_SAFE},
{"lrepeat", Tcl_LrepeatObjCmd, NULL, NULL, CMD_IS_SAFE},
{"lreplace", Tcl_LreplaceObjCmd, TclCompileLreplaceCmd, NULL, CMD_IS_SAFE},
{"lreverse", Tcl_LreverseObjCmd, NULL, NULL, CMD_IS_SAFE},
@@ -758,7 +758,7 @@ buildInfoObjCmd(
p += len;
q = strchr(++p, '.');
if (!q) {
- q = p + strlen(p);
+ q = p + strlen(p);
}
memcpy(buf, p, q - p);
buf[q - p] = '\0';
@@ -1213,6 +1213,10 @@ Tcl_CreateInterp(void)
Tcl_CreateObjCommand(interp, "::tcl::unsupported::corotype",
CoroTypeObjCmd, NULL, NULL);
+ /* Load and intialize ICU */
+ Tcl_CreateObjCommand(interp, "::tcl::unsupported::loadIcu",
+ TclLoadIcuObjCmd, NULL, NULL);
+
/* Export unsupported commands */
nsPtr = Tcl_FindNamespace(interp, "::tcl::unsupported", NULL, 0);
if (nsPtr) {
@@ -1488,9 +1492,11 @@ TclHideUnsafeCommands(
Tcl_Obj *hideName = Tcl_ObjPrintf("tcl:%s:%s",
unsafePtr->ensembleNsName, unsafePtr->commandName);
+#define INTERIM_HACK_NAME "___tmp"
+
if (TclRenameCommand(interp, TclGetString(cmdName),
- "___tmp") != TCL_OK
- || Tcl_HideCommand(interp, "___tmp",
+ INTERIM_HACK_NAME) != TCL_OK
+ || Tcl_HideCommand(interp, INTERIM_HACK_NAME,
TclGetString(hideName)) != TCL_OK) {
Tcl_Panic("problem making '%s %s' safe: %s",
unsafePtr->ensembleNsName, unsafePtr->commandName,
@@ -2763,7 +2769,7 @@ Tcl_CreateObjCommand(
/* If not NULL, gives a function to call when
* this command is deleted. */
{
- Interp *iPtr = (Interp *) interp;
+ Interp *iPtr = (Interp *)interp;
Namespace *nsPtr;
const char *tail;
@@ -2772,7 +2778,7 @@ Tcl_CreateObjCommand(
* The interpreter is being deleted. Don't create any new commands;
* it's not safe to muck with the interpreter anymore.
*/
- return (Tcl_Command) NULL;
+ return NULL;
}
/*
@@ -3045,7 +3051,7 @@ TclInvokeObjectCommand(
int argc, /* Number of arguments. */
const char **argv) /* Argument strings. */
{
- Command *cmdPtr = ( Command *) clientData;
+ Command *cmdPtr = ( Command *)clientData;
Tcl_Obj *objPtr;
int i, length, result;
Tcl_Obj **objv = (Tcl_Obj **)
@@ -9433,7 +9439,7 @@ TclNRTailcallObjCmd(
nsObjPtr = Tcl_NewStringObj(nsPtr->fullName, TCL_INDEX_NONE);
listPtr = Tcl_NewListObj(objc, objv);
- TclListObjSetElement(interp, listPtr, 0, nsObjPtr);
+ TclListObjSetElement(interp, listPtr, 0, nsObjPtr);
iPtr->varFramePtr->tailcallPtr = listPtr;
}
diff --git a/generic/tclIcu.c b/generic/tclIcu.c
new file mode 100644
index 0000000..b6355ee
--- /dev/null
+++ b/generic/tclIcu.c
@@ -0,0 +1,701 @@
+/*
+ * tclIcu.c --
+ *
+ * tclIcu.c implements various Tcl commands that make use of
+ * the ICU library if present on the system.
+ * (Adapted from tkIcu.c)
+ *
+ * Copyright © 2021 Jan Nijtmans
+ * Copyright © 2024 Ashok P. Nadkarni
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ */
+
+#include "tclInt.h"
+
+/*
+ * Runtime linking of libicu.
+ */
+typedef enum UBreakIteratorTypex {
+ UBRK_CHARACTERX = 0,
+ UBRK_WORDX = 1
+} UBreakIteratorTypex;
+
+typedef enum UErrorCodex {
+ U_AMBIGUOUS_ALIAS_WARNING = -122,
+ U_ZERO_ERRORZ = 0, /**< No error, no warning. */
+} UErrorCodex;
+
+#define U_SUCCESS(x) ((x)<=U_ZERO_ERRORZ)
+#define U_FAILURE(x) ((x)>U_ZERO_ERRORZ)
+
+struct UEnumeration;
+typedef struct UEnumeration UEnumeration;
+struct UCharsetDetector;
+typedef struct UCharsetDetector UCharsetDetector;
+struct UCharsetMatch;
+typedef struct UCharsetMatch UCharsetMatch;
+
+/*
+ * Prototypes for ICU functions sorted by category.
+ */
+typedef void (*fn_u_cleanup)(void);
+typedef const char *(*fn_u_errorName)(UErrorCodex);
+
+typedef uint16_t (*fn_ucnv_countAliases)(const char *, UErrorCodex *);
+typedef int32_t (*fn_ucnv_countAvailable)();
+typedef const char *(*fn_ucnv_getAlias)(const char *, uint16_t, UErrorCodex *);
+typedef const char *(*fn_ucnv_getAvailableName)(int32_t);
+
+typedef void *(*fn_ubrk_open)(UBreakIteratorTypex, const char *,
+ const uint16_t *, int32_t, UErrorCodex *);
+typedef void (*fn_ubrk_close)(void *);
+typedef int32_t (*fn_ubrk_preceding)(void *, int32_t);
+typedef int32_t (*fn_ubrk_following)(void *, int32_t);
+typedef int32_t (*fn_ubrk_previous)(void *);
+typedef int32_t (*fn_ubrk_next)(void *);
+typedef void (*fn_ubrk_setText)(void *, const void *, int32_t, UErrorCodex *);
+
+typedef UCharsetDetector * (*fn_ucsdet_open)(UErrorCodex *status);
+typedef void (*fn_ucsdet_close)(UCharsetDetector *ucsd);
+typedef void (*fn_ucsdet_setText)(UCharsetDetector *ucsd, const char *textIn, int32_t len, UErrorCodex *status);
+typedef const char * (*fn_ucsdet_getName)(const UCharsetMatch *ucsm, UErrorCodex *status);
+typedef UEnumeration * (*fn_ucsdet_getAllDetectableCharsets)(UCharsetDetector *ucsd, UErrorCodex *status);
+typedef const UCharsetMatch * (*fn_ucsdet_detect)(UCharsetDetector *ucsd, UErrorCodex *status);
+typedef const UCharsetMatch ** (*fn_ucsdet_detectAll)(UCharsetDetector *ucsd, int32_t *matchesFound, UErrorCodex *status);
+
+typedef void (*fn_uenum_close)(UEnumeration *);
+typedef int32_t (*fn_uenum_count)(UEnumeration *, UErrorCodex *);
+typedef const char *(*fn_uenum_next)(UEnumeration *, int32_t *, UErrorCodex *);
+
+#define FIELD(name) fn_ ## name _ ## name
+static struct {
+ size_t nopen; /* Total number of references to ALL libraries */
+ /*
+ * Depending on platform, ICU symbols may be distributed amongst
+ * multiple libraries. For current functionality at most 2 needed.
+ * Order of library loading is not guaranteed.
+ */
+ Tcl_LoadHandle libs[2];
+
+ FIELD(u_cleanup);
+ FIELD(u_errorName);
+
+ FIELD(ubrk_open);
+ FIELD(ubrk_close);
+ FIELD(ubrk_preceding);
+ FIELD(ubrk_following);
+ FIELD(ubrk_previous);
+ FIELD(ubrk_next);
+ FIELD(ubrk_setText);
+
+ FIELD(ucnv_countAliases);
+ FIELD(ucnv_countAvailable);
+ FIELD(ucnv_getAlias);
+ FIELD(ucnv_getAvailableName);
+
+ FIELD(ucsdet_close);
+ FIELD(ucsdet_detect);
+ FIELD(ucsdet_detectAll);
+ FIELD(ucsdet_getAllDetectableCharsets);
+ FIELD(ucsdet_getName);
+ FIELD(ucsdet_open);
+ FIELD(ucsdet_setText);
+
+ FIELD(uenum_close);
+ FIELD(uenum_count);
+ FIELD(uenum_next);
+
+} icu_fns = {
+ 0, {NULL, NULL}, /* Reference count, library handles */
+ NULL, NULL, /* u_* */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* ubrk* */
+ NULL, NULL, NULL, NULL, /* ucnv_* */
+ NULL, NULL, NULL, NULL, NULL, NULL, NULL, /* ucsdet* */
+ NULL, NULL, NULL, /* uenum_* */
+};
+
+#define u_cleanup icu_fns._u_cleanup
+#define u_errorName icu_fns._u_errorName
+
+#define ubrk_open icu_fns._ubrk_open
+#define ubrk_close icu_fns._ubrk_close
+#define ubrk_preceding icu_fns._ubrk_preceding
+#define ubrk_following icu_fns._ubrk_following
+#define ubrk_previous icu_fns._ubrk_previous
+#define ubrk_next icu_fns._ubrk_next
+#define ubrk_setText icu_fns._ubrk_setText
+
+#define ucnv_countAliases icu_fns._ucnv_countAliases
+#define ucnv_countAvailable icu_fns._ucnv_countAvailable
+#define ucnv_getAlias icu_fns._ucnv_getAlias
+#define ucnv_getAvailableName icu_fns._ucnv_getAvailableName
+
+#define ucsdet_close icu_fns._ucsdet_close
+#define ucsdet_detect icu_fns._ucsdet_detect
+#define ucsdet_detectAll icu_fns._ucsdet_detectAll
+#define ucsdet_getAllDetectableCharsets icu_fns._ucsdet_getAllDetectableCharsets
+#define ucsdet_getName icu_fns._ucsdet_getName
+#define ucsdet_open icu_fns._ucsdet_open
+#define ucsdet_setText icu_fns._ucsdet_setText
+
+#define uenum_next icu_fns._uenum_next
+#define uenum_close icu_fns._uenum_close
+#define uenum_count icu_fns._uenum_count
+
+
+TCL_DECLARE_MUTEX(icu_mutex);
+
+static int FunctionNotAvailableError(Tcl_Interp *interp) {
+ if (interp) {
+ Tcl_SetObjResult(interp,
+ Tcl_NewStringObj("ICU function not available", TCL_INDEX_NONE));
+ }
+ return TCL_ERROR;
+}
+
+static int IcuError(Tcl_Interp *interp, const char *message, UErrorCodex code)
+{
+ if (interp) {
+ const char *codeMessage = NULL;
+ if (u_errorName) {
+ codeMessage = u_errorName(code);
+ }
+ Tcl_SetObjResult(interp,
+ Tcl_ObjPrintf("%s. ICU error (%d): %s",
+ message,
+ code,
+ codeMessage ? codeMessage : ""));
+ }
+ return TCL_ERROR;
+}
+
+static int DetectEncoding(Tcl_Interp *interp, Tcl_Obj *objPtr, int all)
+{
+ Tcl_Size len;
+ const char *bytes;
+ const UCharsetMatch *match;
+ const UCharsetMatch **matches;
+ int nmatches;
+ int ret;
+
+ if (ucsdet_open == NULL || ucsdet_setText == NULL ||
+ ucsdet_detect == NULL || ucsdet_detectAll == NULL ||
+ ucsdet_getName == NULL || ucsdet_close == NULL) {
+ return FunctionNotAvailableError(interp);
+ }
+
+ bytes = (char *) Tcl_GetBytesFromObj(interp, objPtr, &len);
+ if (bytes == NULL) {
+ return TCL_ERROR;
+ }
+ UErrorCodex status = U_ZERO_ERRORZ;
+
+ UCharsetDetector* csd = ucsdet_open(&status);
+ if (U_FAILURE(status)) {
+ return IcuError(interp, "Could not open charset detector.", status);
+ }
+
+ ucsdet_setText(csd, bytes, len, &status);
+ if (U_FAILURE(status)) {
+ IcuError(interp, "Could not set detection text.", status);
+ ucsdet_close(csd);
+ return TCL_ERROR;
+ }
+
+ if (all) {
+ matches = ucsdet_detectAll(csd, &nmatches, &status);
+ }
+ else {
+ match = ucsdet_detect(csd, &status);
+ matches = &match;
+ nmatches = match ? 1 : 0;
+ }
+
+ if (U_FAILURE(status) || nmatches == 0) {
+ ret = IcuError(interp, "Could not detect character set.", status);
+ }
+ else {
+ int i;
+ Tcl_Obj *resultObj = Tcl_NewListObj(nmatches, NULL);
+ for (i = 0; i < nmatches; ++i) {
+ const char *name = ucsdet_getName(matches[i], &status);
+ if (U_FAILURE(status) || name == NULL) {
+ name = "unknown";
+ status = U_ZERO_ERRORZ; /* Reset on failure */
+ }
+ Tcl_ListObjAppendElement(
+ NULL, resultObj, Tcl_NewStringObj(name, -1));
+ }
+ Tcl_SetObjResult(interp, resultObj);
+ ret = TCL_OK;
+ }
+
+ ucsdet_close(csd);
+ return ret;
+}
+
+static int DetectableEncodings(Tcl_Interp *interp)
+{
+ if (ucsdet_open == NULL || ucsdet_getAllDetectableCharsets == NULL ||
+ ucsdet_close == NULL || uenum_next == NULL || uenum_count == NULL ||
+ uenum_close == NULL) {
+ return FunctionNotAvailableError(interp);
+ }
+ UErrorCodex status = U_ZERO_ERRORZ;
+
+ UCharsetDetector* csd = ucsdet_open(&status);
+ if (U_FAILURE(status)) {
+ return IcuError(interp, "Could not open charset detector.", status);
+ }
+
+ int ret;
+ UEnumeration *enumerator = ucsdet_getAllDetectableCharsets(csd, &status);
+ if (U_FAILURE(status) || enumerator == NULL) {
+ IcuError(interp, "Could not get list of detectable encodings.", status);
+ ret = TCL_ERROR;
+ } else {
+ int32_t count;
+ count = uenum_count(enumerator, &status);
+ if (U_FAILURE(status)) {
+ IcuError(interp, "Could not get charset enumerator count.", status);
+ ret = TCL_ERROR;
+ } else {
+ int i;
+ Tcl_Obj *resultObj = Tcl_NewListObj(0, NULL);
+ for (i = 0; i < count; ++i) {
+ const char *name;
+ int32_t len;
+ name = uenum_next(enumerator, &len, &status);
+ if (name == NULL || U_FAILURE(status)) {
+ name = "unknown";
+ len = 7;
+ status = U_ZERO_ERRORZ; /* Reset on error */
+ }
+ Tcl_ListObjAppendElement(
+ interp, resultObj, Tcl_NewStringObj(name, len));
+ }
+ Tcl_SetObjResult(interp, resultObj);
+ ret = TCL_OK;
+ }
+ uenum_close(enumerator);
+ }
+
+ ucsdet_close(csd);
+ return ret;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * EncodingDetectObjCmd --
+ *
+ * Implements the Tcl command EncodingDetect.
+ * encdetect - returns names of all detectable encodings
+ * encdetect BYTES ?-all? - return detected encoding(s)
+ *
+ * Results:
+ * TCL_OK - Success.
+ * TCL_ERROR - Error.
+ *
+ * Side effects:
+ * Interpreter result holds result or error message.
+ *
+ *------------------------------------------------------------------------
+ */
+static int
+IcuDetectObjCmd(
+ TCL_UNUSED(void *),
+ Tcl_Interp *interp,
+ int objc,
+ Tcl_Obj *const objv[])
+{
+ if (objc > 3) {
+ Tcl_WrongNumArgs(interp, 1 , objv, "?bytes ?-all??");
+ return TCL_ERROR;
+ }
+
+ if (objc == 1) {
+ return DetectableEncodings(interp);
+ }
+
+ int all = 0;
+ if (objc == 3) {
+ if (strcmp("-all", Tcl_GetString(objv[2]))) {
+ Tcl_SetObjResult(
+ interp,
+ Tcl_ObjPrintf("Invalid option %s, must be \"-all\"",
+ Tcl_GetString(objv[2])));
+ return TCL_ERROR;
+ }
+ all = 1;
+ }
+
+ return DetectEncoding(interp, objv[1], all);
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * IcuConverterNamesObjCmd --
+ *
+ * Sets interp result to list of available ICU converters.
+ *
+ * Results:
+ * TCL_OK - Success.
+ * TCL_ERROR - Error.
+ *
+ * Side effects:
+ * Interpreter result holds list of converter names.
+ *
+ *------------------------------------------------------------------------
+ */
+static int
+IcuConverterNamesObjCmd (
+ TCL_UNUSED(void *),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+
+ if (objc != 1) {
+ Tcl_WrongNumArgs(interp, 1 , objv, "");
+ return TCL_ERROR;
+ }
+ if (ucnv_countAvailable == NULL || ucnv_getAvailableName == NULL) {
+ return FunctionNotAvailableError(interp);
+ }
+
+ int32_t count = ucnv_countAvailable();
+ if (count <= 0) {
+ return TCL_OK;
+ }
+ Tcl_Obj *resultObj = Tcl_NewListObj(count, NULL);
+ int32_t i;
+ for (i = 0; i < count; ++i) {
+ const char *name = ucnv_getAvailableName(i);
+ if (name) {
+ Tcl_ListObjAppendElement(
+ NULL, resultObj, Tcl_NewStringObj(name, -1));
+ }
+ }
+ Tcl_SetObjResult(interp, resultObj);
+ return TCL_OK;
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * IcuConverterAliasesObjCmd --
+ *
+ * Sets interp result to list of available ICU converters.
+ *
+ * Results:
+ * TCL_OK - Success.
+ * TCL_ERROR - Error.
+ *
+ * Side effects:
+ * Interpreter result holds list of converter names.
+ *
+ *------------------------------------------------------------------------
+ */
+static int
+IcuConverterAliasesObjCmd (
+ TCL_UNUSED(void *),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ if (objc != 2) {
+ Tcl_WrongNumArgs(interp, 1 , objv, "convertername");
+ return TCL_ERROR;
+ }
+ if (ucnv_countAliases == NULL || ucnv_getAlias == NULL) {
+ return FunctionNotAvailableError(interp);
+ }
+
+ const char *name = Tcl_GetString(objv[1]);
+ UErrorCodex status = U_ZERO_ERRORZ;
+ uint16_t count = ucnv_countAliases(name, &status);
+ if (status != U_AMBIGUOUS_ALIAS_WARNING && U_FAILURE(status)) {
+ return IcuError(interp, "Could not get aliases.", status);
+ }
+ if (count <= 0) {
+ return TCL_OK;
+ }
+ Tcl_Obj *resultObj = Tcl_NewListObj(count, NULL);
+ uint16_t i;
+ for (i = 0; i < count; ++i) {
+ status = U_ZERO_ERRORZ; /* Reset in case U_AMBIGUOUS_ALIAS_WARNING */
+ const char *aliasName = ucnv_getAlias(name, i, &status);
+ if (status != U_AMBIGUOUS_ALIAS_WARNING && U_FAILURE(status)) {
+ status = U_ZERO_ERRORZ; /* Reset error for next iteration */
+ continue;
+ }
+ if (aliasName) {
+ Tcl_ListObjAppendElement(
+ NULL, resultObj, Tcl_NewStringObj(aliasName, -1));
+ }
+ }
+ Tcl_SetObjResult(interp, resultObj);
+ return TCL_OK;
+}
+
+static void
+TclIcuCleanup(
+ TCL_UNUSED(void *))
+{
+ Tcl_MutexLock(&icu_mutex);
+ if (icu_fns.nopen-- <= 1) {
+ int i;
+ if (u_cleanup != NULL) {
+ u_cleanup();
+ }
+ for (i = 0; i < (int)(sizeof(icu_fns.libs) / sizeof(icu_fns.libs[0]));
+ ++i) {
+ if (icu_fns.libs[i] != NULL) {
+ Tcl_FSUnloadFile(NULL, icu_fns.libs[i]);
+ }
+ }
+ memset(&icu_fns, 0, sizeof(icu_fns));
+ }
+ Tcl_MutexUnlock(&icu_mutex);
+}
+
+static void
+TclIcuInit(
+ Tcl_Interp *interp)
+{
+ Tcl_MutexLock(&icu_mutex);
+ char symbol[256];
+ char icuversion[4] = "_80"; /* Highest ICU version + 1 */
+
+ /*
+ * The initialization below clones the existing one from Tk. May need
+ * revisiting.
+ * ICU shared library names as well as function names *may* be versioned.
+ * See https://unicode-org.github.io/icu/userguide/icu4c/packaging.html
+ * for the gory details.
+ */
+ if (icu_fns.nopen == 0) {
+ int i = 0;
+ Tcl_Obj *nameobj;
+ static const char *iculibs[] = {
+#if defined(_WIN32)
+# define DLLNAME "icu%s%s.dll"
+ "icuuc??.dll", /* Windows, user-provided */
+ NULL,
+ "cygicuuc??.dll", /* When running under Cygwin */
+#elif defined(__CYGWIN__)
+# define DLLNAME "cygicu%s%s.dll"
+ "cygicuuc??.dll",
+#elif defined(MAC_OSX_TCL)
+# define DLLNAME "libicu%s.%s.dylib"
+ "libicuuc.??.dylib",
+#else
+# define DLLNAME "libicu%s.so.%s"
+ "libicuuc.so.??",
+#endif
+ NULL
+ };
+
+ /* Going back down to ICU version 60 */
+ while ((icu_fns.libs[0] == NULL) && (icuversion[1] >= '6')) {
+ if (--icuversion[2] < '0') {
+ icuversion[1]--; icuversion[2] = '9';
+ }
+#if defined(__CYGWIN__)
+ i = 2;
+#else
+ i = 0;
+#endif
+ while (iculibs[i] != NULL) {
+ Tcl_ResetResult(interp);
+ nameobj = Tcl_NewStringObj(iculibs[i], TCL_INDEX_NONE);
+ char *nameStr = Tcl_GetString(nameobj);
+ char *p = strchr(nameStr, '?');
+ if (p != NULL) {
+ memcpy(p, icuversion+1, 2);
+ }
+ Tcl_IncrRefCount(nameobj);
+ if (Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[0])
+ == TCL_OK) {
+ if (p == NULL) {
+ icuversion[0] = '\0';
+ }
+ Tcl_DecrRefCount(nameobj);
+ break;
+ }
+ Tcl_DecrRefCount(nameobj);
+ ++i;
+ }
+ }
+ if (icu_fns.libs[0] != NULL) {
+ /* Loaded icuuc, load others with the same version */
+ nameobj = Tcl_ObjPrintf(DLLNAME, "i18n", icuversion+1);
+ Tcl_IncrRefCount(nameobj);
+ /* Ignore errors. Calls to contained functions will fail. */
+ (void) Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[1]);
+ Tcl_DecrRefCount(nameobj);
+ }
+#if defined(_WIN32)
+ /*
+ * On Windows, if no ICU install found, look for the system's
+ * (Win10 1703 or later). There are two cases. Newer systems
+ * have icu.dll containing all functions. Older systems have
+ * icucc.dll and icuin.dll
+ */
+ if (icu_fns.libs[0] == NULL) {
+ Tcl_ResetResult(interp);
+ nameobj = Tcl_NewStringObj("icu.dll", TCL_INDEX_NONE);
+ Tcl_IncrRefCount(nameobj);
+ if (Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[0])
+ == TCL_OK) {
+ /* Reload same for second set of functions. */
+ (void) Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[1]);
+ /* Functions do NOT have version suffixes */
+ icuversion[0] = '\0';
+ }
+ Tcl_DecrRefCount(nameobj);
+ }
+ if (icu_fns.libs[0] == NULL) {
+ /* No icu.dll. Try last fallback */
+ Tcl_ResetResult(interp);
+ nameobj = Tcl_NewStringObj("icuuc.dll", TCL_INDEX_NONE);
+ Tcl_IncrRefCount(nameobj);
+ if (Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[0])
+ == TCL_OK) {
+ Tcl_DecrRefCount(nameobj);
+ nameobj = Tcl_NewStringObj("icuin.dll", TCL_INDEX_NONE);
+ Tcl_IncrRefCount(nameobj);
+ (void) Tcl_LoadFile(interp, nameobj, NULL, 0, NULL, &icu_fns.libs[1]);
+ /* Functions do NOT have version suffixes */
+ icuversion[0] = '\0';
+ }
+ Tcl_DecrRefCount(nameobj);
+ }
+#endif
+
+#define ICUUC_SYM(name) \
+ strcpy(symbol, #name ); \
+ strcat(symbol, icuversion); \
+ icu_fns._##name = (fn_ ## name) \
+ Tcl_FindSymbol(NULL, icu_fns.libs[0], symbol)
+ if (icu_fns.libs[0] != NULL) {
+ ICUUC_SYM(u_cleanup);
+ ICUUC_SYM(u_errorName);
+
+ ICUUC_SYM(ucnv_countAliases);
+ ICUUC_SYM(ucnv_countAvailable);
+ ICUUC_SYM(ucnv_getAlias);
+ ICUUC_SYM(ucnv_getAvailableName);
+
+ ICUUC_SYM(ubrk_open);
+ ICUUC_SYM(ubrk_close);
+ ICUUC_SYM(ubrk_preceding);
+ ICUUC_SYM(ubrk_following);
+ ICUUC_SYM(ubrk_previous);
+ ICUUC_SYM(ubrk_next);
+ ICUUC_SYM(ubrk_setText);
+
+ ICUUC_SYM(uenum_close);
+ ICUUC_SYM(uenum_count);
+ ICUUC_SYM(uenum_next);
+
+#undef ICUUC_SYM
+ }
+
+#define ICUIN_SYM(name) \
+ strcpy(symbol, #name ); \
+ strcat(symbol, icuversion); \
+ icu_fns._##name = (fn_ ## name) \
+ Tcl_FindSymbol(NULL, icu_fns.libs[1], symbol)
+ if (icu_fns.libs[1] != NULL) {
+ ICUIN_SYM(ucsdet_close);
+ ICUIN_SYM(ucsdet_detect);
+ ICUIN_SYM(ucsdet_detectAll);
+ ICUIN_SYM(ucsdet_getName);
+ ICUIN_SYM(ucsdet_getAllDetectableCharsets);
+ ICUIN_SYM(ucsdet_open);
+ ICUIN_SYM(ucsdet_setText);
+#undef ICUIN_SYM
+ }
+
+ }
+#undef ICU_SYM
+
+ Tcl_MutexUnlock(&icu_mutex);
+
+ if (icu_fns.libs[0] != NULL) {
+ /*
+ * Note refcounts updated BEFORE command definition to protect
+ * against self redefinition.
+ */
+ if (icu_fns.libs[1] != NULL) {
+ /* Commands needing both libraries */
+
+ /* Ref count number of commands */
+ icu_fns.nopen += 1;
+ Tcl_CreateObjCommand(interp,
+ "::tcl::unsupported::icu::detect",
+ IcuDetectObjCmd,
+ 0,
+ TclIcuCleanup);
+ }
+ /* Commands needing only libs[0] (icuuc) */
+
+ /* Ref count number of commands */
+ icu_fns.nopen += 2;
+ Tcl_CreateObjCommand(interp,
+ "::tcl::unsupported::icu::converters",
+ IcuConverterNamesObjCmd,
+ 0,
+ TclIcuCleanup);
+ Tcl_CreateObjCommand(interp,
+ "::tcl::unsupported::icu::aliases",
+ IcuConverterAliasesObjCmd,
+ 0,
+ TclIcuCleanup);
+ }
+}
+
+/*
+ *------------------------------------------------------------------------
+ *
+ * TclLoadIcuObjCmd --
+ *
+ * Loads and initializes ICU
+ *
+ * Results:
+ * TCL_OK - Success.
+ * TCL_ERROR - Error.
+ *
+ * Side effects:
+ * Interpreter result holds result or error message.
+ *
+ *------------------------------------------------------------------------
+ */
+int
+TclLoadIcuObjCmd (
+ TCL_UNUSED(void *),
+ Tcl_Interp *interp, /* Current interpreter. */
+ int objc, /* Number of arguments. */
+ Tcl_Obj *const objv[]) /* Argument objects. */
+{
+ if (objc != 1) {
+ Tcl_WrongNumArgs(interp, 1 , objv, "");
+ return TCL_ERROR;
+ }
+ TclIcuInit(interp);
+ return TCL_OK;
+}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * coding: utf-8
+ * End:
+ */
diff --git a/generic/tclInt.h b/generic/tclInt.h
index b34ca35..9e956dc 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3556,6 +3556,7 @@ MODULE_SCOPE int TclDictWithFinish(Tcl_Interp *interp, Var *varPtr,
MODULE_SCOPE Tcl_Obj * TclDictWithInit(Tcl_Interp *interp, Tcl_Obj *dictPtr,
Tcl_Size pathc, Tcl_Obj *const pathv[]);
MODULE_SCOPE Tcl_ObjCmdProc Tcl_DisassembleObjCmd;
+MODULE_SCOPE Tcl_ObjCmdProc TclLoadIcuObjCmd;
/* Assemble command function */
MODULE_SCOPE Tcl_ObjCmdProc Tcl_AssembleObjCmd;
diff --git a/library/icu.tcl b/library/icu.tcl
new file mode 100644
index 0000000..827fd04
--- /dev/null
+++ b/library/icu.tcl
@@ -0,0 +1,145 @@
+#----------------------------------------------------------------------
+#
+# icu.tcl --
+#
+# This file implements the portions of the [tcl::unsupported::icu]
+# ensemble that are coded in Tcl.
+#
+#----------------------------------------------------------------------
+#
+# Copyright © 2024 Ashok P. Nadkarni
+# See the file "license.terms" for information on usage and redistribution
+# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+#
+#----------------------------------------------------------------------
+
+::tcl::unsupported::loadIcu
+
+namespace eval ::tcl::unsupported::icu {
+ # Map Tcl encoding names to ICU and back. Note ICU has multiple aliases
+ # for the same encoding.
+ variable tclToIcu
+ variable icuToTcl
+
+ proc LogError {message} {
+ puts stderr $message
+ }
+
+ proc Init {} {
+ variable tclToIcu
+ variable icuToTcl
+ # There are some special cases where names do not line up
+ # at all. Map Tcl -> ICU
+ array set specialCases {
+ ebcdic ebcdic-cp-us
+ macCentEuro maccentraleurope
+ utf16 UTF16_PlatformEndian
+ utf-16be UnicodeBig
+ utf-16le UnicodeLittle
+ utf32 UTF32_PlatformEndian
+ }
+ # Ignore all errors. Do not want to hold up Tcl
+ # if ICU not available
+ if {[catch {
+ foreach tclName [encoding names] {
+ if {[catch {
+ set icuNames [aliases $tclName]
+ } erMsg]} {
+ LogError "Could not get aliases for $tclName: $erMsg"
+ continue
+ }
+ if {[llength $icuNames] == 0} {
+ # E.g. macGreek -> x-MacGreek
+ set icuNames [aliases x-$tclName]
+ if {[llength $icuNames] == 0} {
+ # Still no joy, check for special cases
+ if {[info exists specialCases($tclName)]} {
+ set icuNames [aliases $specialCases($tclName)]
+ }
+ }
+ }
+ # If the Tcl name is also an ICU name use it else use
+ # the first name which is the canonical ICU name
+ set pos [lsearch -exact -nocase $icuNames $tclName]
+ if {$pos >= 0} {
+ lappend tclToIcu($tclName) [lindex $icuNames $pos] {*}[lreplace $icuNames $pos $pos]
+ } else {
+ set tclToIcu($tclName) $icuNames
+ }
+ foreach icuName $icuNames {
+ lappend icuToTcl($icuName) $tclName
+ }
+ }
+ } errMsg]} {
+ LogError $errMsg
+ }
+ array default set tclToIcu ""
+ array default set icuToTcl ""
+
+ # Redefine ourselves to no-op.
+ proc Init {} {}
+ }
+ # Primarily used during development
+ proc MappedIcuNames {{pat *}} {
+ Init
+ variable icuToTcl
+ return [array names icuToTcl $pat]
+ }
+ # Primarily used during development
+ proc UnmappedIcuNames {{pat *}} {
+ Init
+ variable icuToTcl
+ set unmappedNames {}
+ foreach icuName [converters] {
+ if {[llength [icuToTcl $icuName]] == 0} {
+ lappend unmappedNames $icuName
+ }
+ foreach alias [aliases $icuName] {
+ if {[llength [icuToTcl $alias]] == 0} {
+ lappend unmappedNames $alias
+ }
+ }
+ }
+ # Aliases can be duplicates. Remove
+ return [lsort -unique [lsearch -inline -all $unmappedNames $pat]]
+ }
+ # Primarily used during development
+ proc UnmappedTclNames {{pat *}} {
+ Init
+ variable tclToIcu
+ set unmappedNames {}
+ foreach tclName [encoding names] {
+ # Note entry will always exist. Check if empty
+ if {[llength [tclToIcu $tclName]] == 0} {
+ lappend unmappedNames $tclName
+ }
+ }
+ return [lsearch -inline -all $unmappedNames $pat]
+ }
+
+ # Returns the Tcl equivalent of an ICU encoding name or
+ # the empty string in case not found.
+ proc icuToTcl {icuName} {
+ Init
+ proc icuToTcl {icuName} {
+ variable icuToTcl
+ return [lindex $icuToTcl($icuName) 0]
+ }
+ icuToTcl $icuName
+ }
+
+ # Returns the ICU equivalent of an Tcl encoding name or
+ # the empty string in case not found.
+ proc tclToIcu {tclName} {
+ Init
+ proc tclToIcu {tclName} {
+ variable tclToIcu
+ return [lindex $tclToIcu($tclName) 0]
+ }
+ tclToIcu $tclName
+ }
+
+
+ namespace export {[a-z]*}
+ namespace ensemble create
+}
diff --git a/library/tclIndex b/library/tclIndex
index 871298f..2b5619f 100644
--- a/library/tclIndex
+++ b/library/tclIndex
@@ -109,3 +109,4 @@ set auto_index(tcl_endOfWord) [list ::tcl::Pkg::source [file join $dir word.tcl]
set auto_index(tcl_startOfNextWord) [list ::tcl::Pkg::source [file join $dir word.tcl]]
set auto_index(tcl_startOfPreviousWord) [list ::tcl::Pkg::source [file join $dir word.tcl]]
set auto_index(writeFile) [list ::tcl::Pkg::source [file join $dir writefile.tcl]]
+set auto_index(::tcl::unsupported::icu) [list ::tcl::Pkg::source [file join $dir icu.tcl]] \ No newline at end of file
diff --git a/tests/icu.test b/tests/icu.test
new file mode 100644
index 0000000..522ed53
--- /dev/null
+++ b/tests/icu.test
@@ -0,0 +1,51 @@
+# Tests for tcl::unsupported::icu
+
+if {"::tcltest" ni [namespace children]} {
+ package require tcltest
+ namespace import -force ::tcltest::*
+}
+
+# Force late loading of ICU if present
+catch {::tcl::unsupported::icu}
+testConstraint icu [expr {[info commands ::tcl::unsupported::icu::detect] ne ""}]
+
+namespace eval icu {
+ test icu-detect-0 {Return list of ICU encodings} -constraints icu -body {
+ set encoders [::tcl::unsupported::icu detect]
+ list [::tcl::mathop::in UTF-8 $encoders] [::tcl::mathop::in ISO-8859-1 $encoders]
+ } -result {1 1}
+
+ test icu-detect-1 {Guess encoding} -constraints icu -body {
+ ::tcl::unsupported::icu detect [readFile [info script]]
+ } -result ISO-8859-1
+
+ test icu-detect-2 {Get all possible encodings} -constraints icu -body {
+ set encodings [::tcl::unsupported::icu detect [readFile [info script]] -all]
+ list [::tcl::mathop::in UTF-8 $encodings] [::tcl::mathop::in ISO-8859-1 $encodings]
+ } -result {1 1}
+
+ test icu-tclToIcu-0 {Map Tcl encoding} -constraints icu -body {
+ # tis-620 because it is ambiguous in ICU on some platforms
+ # but should return the preferred encoding
+ list [::tcl::unsupported::icu tclToIcu utf-8] [::tcl::unsupported::icu tclToIcu tis-620] [::tcl::unsupported::icu tclToIcu shiftjis]
+ } -result {UTF-8 TIS-620 ibm-943_P15A-2003}
+
+ test icu-tclToIcu-1 {Map Tcl encoding - no map} -constraints icu -body {
+ # Should not raise an error
+ ::tcl::unsupported::icu tclToIcu dummy
+ } -result {}
+
+ test icu-icuToTcl-0 {Map ICU encoding} -constraints icu -body {
+ list [::tcl::unsupported::icu icuToTcl UTF-8] [::tcl::unsupported::icu icuToTcl TIS-620] [::tcl::unsupported::icu icuToTcl ibm-943_P15A-2003]
+ } -result {utf-8 tis-620 cp932}
+
+ test icu-icuToTcl-1 {Map ICU encoding - no map} -constraints icu -body {
+ # Should not raise an error
+ ::tcl::unsupported::icu icuToTcl dummy
+ } -result {}
+
+}
+
+
+namespace delete icu
+::tcltest::cleanupTests
diff --git a/unix/Makefile.in b/unix/Makefile.in
index 9610165..71c7c93 100644
--- a/unix/Makefile.in
+++ b/unix/Makefile.in
@@ -305,7 +305,8 @@ GENERIC_OBJS = regcomp.o regexec.o regfree.o regerror.o tclAlloc.o \
tclCompile.o tclConfig.o tclDate.o tclDictObj.o tclDisassemble.o \
tclEncoding.o tclEnsemble.o \
tclEnv.o tclEvent.o tclExecute.o tclFCmd.o tclFileName.o tclGet.o \
- tclHash.o tclHistory.o tclIndexObj.o tclInterp.o tclIO.o tclIOCmd.o \
+ tclHash.o tclHistory.o \
+ tclIcu.o tclIndexObj.o tclInterp.o tclIO.o tclIOCmd.o \
tclIORChan.o tclIORTrans.o tclIOGT.o tclIOSock.o tclIOUtil.o \
tclLink.o tclListObj.o \
tclLiteral.o tclLoad.o tclMain.o tclNamesp.o tclNotify.o \
@@ -431,6 +432,7 @@ GENERIC_SRCS = \
$(GENERIC_DIR)/tclGet.c \
$(GENERIC_DIR)/tclHash.c \
$(GENERIC_DIR)/tclHistory.c \
+ $(GENERIC_DIR)/tclIcu.c \
$(GENERIC_DIR)/tclIndexObj.c \
$(GENERIC_DIR)/tclInterp.c \
$(GENERIC_DIR)/tclIO.c \
@@ -1371,6 +1373,9 @@ tclHash.o: $(GENERIC_DIR)/tclHash.c
tclHistory.o: $(GENERIC_DIR)/tclHistory.c
$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclHistory.c
+tclIcu.o: $(GENERIC_DIR)/tclIcu.c
+ $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclIcu.c
+
tclIndexObj.o: $(GENERIC_DIR)/tclIndexObj.c
$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclIndexObj.c
diff --git a/win/Makefile.in b/win/Makefile.in
index 391ec58..69421f0 100644
--- a/win/Makefile.in
+++ b/win/Makefile.in
@@ -313,6 +313,7 @@ GENERIC_OBJS = \
tclGet.$(OBJEXT) \
tclHash.$(OBJEXT) \
tclHistory.$(OBJEXT) \
+ tclIcu.$(OBJEXT) \
tclIndexObj.$(OBJEXT) \
tclInterp.$(OBJEXT) \
tclIO.$(OBJEXT) \
diff --git a/win/makefile.vc b/win/makefile.vc
index 74a594c..c02465ff 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -278,6 +278,7 @@ COREOBJS = \
$(TMP_DIR)\tclGet.obj \
$(TMP_DIR)\tclHash.obj \
$(TMP_DIR)\tclHistory.obj \
+ $(TMP_DIR)\tclIcu.obj \
$(TMP_DIR)\tclIndexObj.obj \
$(TMP_DIR)\tclInterp.obj \
$(TMP_DIR)\tclIO.obj \