diff options
Diffstat (limited to 'unix/tclUnixInit.c')
-rw-r--r-- | unix/tclUnixInit.c | 523 |
1 files changed, 275 insertions, 248 deletions
diff --git a/unix/tclUnixInit.c b/unix/tclUnixInit.c index 3592c17..4dd3208 100644 --- a/unix/tclUnixInit.c +++ b/unix/tclUnixInit.c @@ -7,7 +7,7 @@ * Copyright (c) 1999 by Scriptics Corporation. * All rights reserved. * - * RCS: @(#) $Id: tclUnixInit.c,v 1.52 2004/11/22 22:13:40 dgp Exp $ + * RCS: @(#) $Id: tclUnixInit.c,v 1.53 2004/11/30 19:34:51 dgp Exp $ */ #include "tclInt.h" @@ -93,11 +93,6 @@ static Tcl_ThreadDataKey dataKey; #define STACK_DEBUG(args) (void)0 #endif /* TCL_DEBUG_STACK_CHECK */ -/* Used to store the encoding used for binary files */ -static Tcl_Encoding binaryEncoding = NULL; -/* Has the basic library path encoding issue been fixed */ -static int libraryPathEncodingFixed = 0; - /* * Tcl tries to use standard and homebrew methods to guess the right * encoding on the platform. However, there is always a final fallback, @@ -137,6 +132,147 @@ typedef struct LocaleTable { } LocaleTable; static CONST LocaleTable localeTable[] = { + /* First list all the encoding files installed with Tcl */ + {"ascii", "ascii"}, + {"big5", "big5"}, + {"cp1250", "cp1250"}, + {"cp1251", "cp1251"}, + {"cp1252", "cp1252"}, + {"cp1253", "cp1253"}, + {"cp1254", "cp1254"}, + {"cp1255", "cp1255"}, + {"cp1256", "cp1256"}, + {"cp1257", "cp1257"}, + {"cp1258", "cp1258"}, + {"cp437", "cp437"}, + {"cp737", "cp737"}, + {"cp775", "cp775"}, + {"cp850", "cp850"}, + {"cp852", "cp852"}, + {"cp855", "cp855"}, + {"cp857", "cp857"}, + {"cp860", "cp860"}, + {"cp861", "cp861"}, + {"cp862", "cp862"}, + {"cp863", "cp863"}, + {"cp864", "cp864"}, + {"cp865", "cp865"}, + {"cp866", "cp866"}, + {"cp869", "cp869"}, + {"cp874", "cp874"}, + {"cp932", "cp932"}, + {"cp936", "cp936"}, + {"cp949", "cp949"}, + {"cp950", "cp950"}, + {"dingbats", "dingbats"}, + {"ebcdic", "ebcdic"}, + {"euc-cn", "euc-cn"}, + {"euc-jp", "euc-jp"}, + {"euc-kr", "euc-kr"}, + {"gb12345", "gb12345"}, + {"gb1988", "gb1988"}, + {"gb2312-raw", "gb2312-raw"}, + {"gb2312", "gb2312"}, + {"iso2022-jp", "iso2022-jp"}, + {"iso2022-kr", "iso2022-kr"}, + {"iso2022", "iso2022"}, + {"iso8859-1", "iso8859-1"}, + {"iso8859-10", "iso8859-10"}, + {"iso8859-13", "iso8859-13"}, + {"iso8859-14", "iso8859-14"}, + {"iso8859-15", "iso8859-15"}, + {"iso8859-16", "iso8859-16"}, + {"iso8859-2", "iso8859-2"}, + {"iso8859-3", "iso8859-3"}, + {"iso8859-4", "iso8859-4"}, + {"iso8859-5", "iso8859-5"}, + {"iso8859-6", "iso8859-6"}, + {"iso8859-7", "iso8859-7"}, + {"iso8859-8", "iso8859-8"}, + {"iso8859-9", "iso8859-9"}, + {"jis0201", "jis0201"}, + {"jis0208", "jis0208"}, + {"jis0212", "jis0212"}, + {"koi8-r", "koi8-r"}, + {"koi8-u", "koi8-u"}, + {"ksc5601", "ksc5601"}, + {"macCentEuro", "macCentEuro"}, + {"macCroatian", "macCroatian"}, + {"macCyrillic", "macCyrillic"}, + {"macDingbats", "macDingbats"}, + {"macGreek", "macGreek"}, + {"macIceland", "macIceland"}, + {"macJapan", "macJapan"}, + {"macRoman", "macRoman"}, + {"macRomania", "macRomania"}, + {"macThai", "macThai"}, + {"macTurkish", "macTurkish"}, + {"macUkraine", "macUkraine"}, + {"shiftjis", "shiftjis"}, + {"symbol", "symbol"}, + {"tis-620", "tis-620"}, + /* Next list a few common variants */ + {"maccenteuro", "macCentEuro"}, + {"maccroatian", "macCroatian"}, + {"maccyrillic", "macCyrillic"}, + {"macdingbats", "macDingbats"}, + {"macgreek", "macGreek"}, + {"maciceland", "macIceland"}, + {"macjapan", "macJapan"}, + {"macroman", "macRoman"}, + {"macromania", "macRomania"}, + {"macthai", "macThai"}, + {"macturkish", "macTurkish"}, + {"macukraine", "macUkraine"}, + {"iso-2022-jp", "iso2022-jp"}, + {"iso-2022-kr", "iso2022-kr"}, + {"iso-2022", "iso2022"}, + {"iso-8859-1", "iso8859-1"}, + {"iso-8859-10", "iso8859-10"}, + {"iso-8859-13", "iso8859-13"}, + {"iso-8859-14", "iso8859-14"}, + {"iso-8859-15", "iso8859-15"}, + {"iso-8859-16", "iso8859-16"}, + {"iso-8859-2", "iso8859-2"}, + {"iso-8859-3", "iso8859-3"}, + {"iso-8859-4", "iso8859-4"}, + {"iso-8859-5", "iso8859-5"}, + {"iso-8859-6", "iso8859-6"}, + {"iso-8859-7", "iso8859-7"}, + {"iso-8859-8", "iso8859-8"}, + {"iso-8859-9", "iso8859-9"}, + {"ibm1250", "cp1250"}, + {"ibm1251", "cp1251"}, + {"ibm1252", "cp1252"}, + {"ibm1253", "cp1253"}, + {"ibm1254", "cp1254"}, + {"ibm1255", "cp1255"}, + {"ibm1256", "cp1256"}, + {"ibm1257", "cp1257"}, + {"ibm1258", "cp1258"}, + {"ibm437", "cp437"}, + {"ibm737", "cp737"}, + {"ibm775", "cp775"}, + {"ibm850", "cp850"}, + {"ibm852", "cp852"}, + {"ibm855", "cp855"}, + {"ibm857", "cp857"}, + {"ibm860", "cp860"}, + {"ibm861", "cp861"}, + {"ibm862", "cp862"}, + {"ibm863", "cp863"}, + {"ibm864", "cp864"}, + {"ibm865", "cp865"}, + {"ibm866", "cp866"}, + {"ibm869", "cp869"}, + {"ibm874", "cp874"}, + {"ibm932", "cp932"}, + {"ibm936", "cp936"}, + {"ibm949", "cp949"}, + {"ibm950", "cp950"}, + {"", "iso8859-1"}, + {"ansi_x3.4-1968", "iso8859-1"}, + /* Finally, the accumulated bug fixes... */ #ifdef HAVE_LANGINFO {"gb2312-1980", "gb2312"}, #ifdef __hpux @@ -280,6 +416,25 @@ TclpInitPlatform() */ (void) dlopen (NULL, RTLD_NOW); /* INTL: Native. */ #endif + /* + * Initialize the C library's locale subsystem. This is required + * for input methods to work properly on X11. We only do this for + * LC_CTYPE because that's the necessary one, and we don't want to + * affect LC_TIME here. The side effect of setting the default + * locale should be to load any locale specific modules that are + * needed by X. [BUG: 5422 3345 4236 2522 2521]. + */ + + setlocale(LC_CTYPE, ""); + + /* + * In case the initial locale is not "C", ensure that the numeric + * processing is done in "C" locale regardless. This is needed because + * Tcl relies on routines like strtod, but should not have locale + * dependent behavior. + */ + + setlocale(LC_NUMERIC, "C"); } /* @@ -287,47 +442,24 @@ TclpInitPlatform() * * TclpInitLibraryPath -- * - * Initialize the library path at startup. We have a minor - * metacircular problem that we don't know the encoding of the - * operating system but we may need to talk to operating system - * to find the library directories so that we know how to talk to - * the operating system. - * - * We do not know the encoding of the operating system. - * We do know that the encoding is some multibyte encoding. - * In that multibyte encoding, the characters 0..127 are equivalent - * to ascii. - * - * So although we don't know the encoding, it's safe: - * to look for the last slash character in a path in the encoding. - * to append an ascii string to a path. - * to pass those strings back to the operating system. - * - * But any strings that we remembered before we knew the encoding of - * the operating system must be translated to UTF-8 once we know the - * encoding so that the rest of Tcl can use those strings. - * - * This call sets the library path to strings in the unknown native - * encoding. TclpSetInitialEncodings() will translate the library - * path from the native encoding to UTF-8 as soon as it determines - * what the native encoding actually is. - * - * Called at process initialization time. + * This is the fallback routine that sets the library path + * if the application has not set one by the first time + * it is needed. * * Results: - * Return 1, indicating that the UTF may be dirty and require "cleanup" - * after encodings are initialized. + * None. * * Side effects: - * None. + * Sets the library path to an initial value. * - *--------------------------------------------------------------------------- - */ + *------------------------------------------------------------------------- + */ -int -TclpInitLibraryPath(path) -CONST char *path; /* Path to the executable in native - * multi-byte encoding. */ +void +TclpInitLibraryPath(valuePtr, lengthPtr, encodingPtr) + char **valuePtr; + int *lengthPtr; + Tcl_Encoding *encodingPtr; { #define LIBRARY_SIZE 32 Tcl_Obj *pathPtr, *objPtr; @@ -349,16 +481,6 @@ CONST char *path; /* Path to the executable in native sprintf(installLib, "lib/tcl%s", TCL_VERSION); /* - * Look for the library relative to default encoding dir. - */ - - str = Tcl_GetDefaultEncodingDir(); - if ((str != NULL) && (str[0] != '\0')) { - objPtr = Tcl_NewStringObj(str, -1); - Tcl_ListObjAppendElement(NULL, pathPtr, objPtr); - } - - /* * Look for the library relative to the TCL_LIBRARY env variable. * If the last dirname in the TCL_LIBRARY path does not match the * last dirname in the installLib variable, use the last dir name @@ -411,6 +533,7 @@ CONST char *path; /* Path to the executable in native } else #endif /* HAVE_CFBUNDLE */ { + /* TODO: Pull this value from the TIP 59 table */ str = defaultLibraryDir; } if (str[0] != '\0') { @@ -418,11 +541,13 @@ CONST char *path; /* Path to the executable in native Tcl_ListObjAppendElement(NULL, pathPtr, objPtr); } } - - TclSetLibraryPath(pathPtr); Tcl_DStringFree(&buffer); - return 1; /* 1 indicates that pathPtr may be dirty utf (needs cleaning) */ + *encodingPtr = Tcl_GetEncoding(NULL, NULL); + str = Tcl_GetStringFromObj(pathPtr, lengthPtr); + *valuePtr = ckalloc((unsigned int) (*lengthPtr)+1); + memcpy((VOID *) *valuePtr, (VOID *) str, (size_t)(*lengthPtr)+1); + Tcl_DecrRefCount(pathPtr); } /* @@ -452,223 +577,125 @@ CONST char *path; /* Path to the executable in native void TclpSetInitialEncodings() { - if (libraryPathEncodingFixed == 0) { - CONST char *encoding = NULL; - int i, setSysEncCode = TCL_ERROR; - Tcl_Obj *pathPtr; - - /* - * Determine the current encoding from the LC_* or LANG environment - * variables. We previously used setlocale() to determine the locale, - * but this does not work on some systems (e.g. Linux/i386 RH 5.0). - */ -#ifdef HAVE_LANGINFO - if (setlocale(LC_CTYPE, "") != NULL) { - Tcl_DString ds; - - /* - * Use a DString so we can overwrite it in name compatability - * checks below. - */ + Tcl_DString encodingName; + Tcl_SetSystemEncoding(NULL, + TclpGetEncodingNameFromEnvironment(&encodingName)); + Tcl_DStringFree(&encodingName); +} - Tcl_DStringInit(&ds); - encoding = Tcl_DStringAppend(&ds, nl_langinfo(CODESET), -1); +CONST char * +TclpGetEncodingNameFromEnvironment(bufPtr) + Tcl_DString *bufPtr; +{ + CONST char *encoding; + int i; - Tcl_UtfToLower(Tcl_DStringValue(&ds)); -#ifdef HAVE_LANGINFO_DEBUG - fprintf(stderr, "encoding '%s'", encoding); -#endif - if (encoding[0] == 'i' && encoding[1] == 's' && encoding[2] == 'o' - && encoding[3] == '-') { - char *p, *q; - /* need to strip '-' from iso-* encoding */ - for(p = Tcl_DStringValue(&ds)+3, q = Tcl_DStringValue(&ds)+4; - *p; *p++ = *q++); - } else if (encoding[0] == 'i' && encoding[1] == 'b' - && encoding[2] == 'm' && encoding[3] >= '0' - && encoding[3] <= '9') { - char *p, *q; - /* if langinfo reports "ibm*" we should use "cp*" */ - p = Tcl_DStringValue(&ds); - *p++ = 'c'; *p++ = 'p'; - for(q = p+1; *p ; *p++ = *q++); - } else if ((*encoding == '\0') - || !strcmp(encoding, "ansi_x3.4-1968")) { - /* Use iso8859-1 for empty or 'ansi_x3.4-1968' encoding */ - encoding = "iso8859-1"; - } -#ifdef HAVE_LANGINFO_DEBUG - fprintf(stderr, " ?%s?", encoding); -#endif - setSysEncCode = Tcl_SetSystemEncoding(NULL, encoding); - if (setSysEncCode != TCL_OK) { - /* - * If this doesn't return TCL_OK, the encoding returned by - * nl_langinfo or as we translated it wasn't accepted. Do - * this fallback check. If this fails, we will enter the - * old fallback below. - */ + Tcl_DStringInit(bufPtr); - for (i = 0; localeTable[i].lang != NULL; i++) { - if (strcmp(localeTable[i].lang, encoding) == 0) { - setSysEncCode = Tcl_SetSystemEncoding(NULL, - localeTable[i].encoding); - break; - } + /* + * Determine the current encoding from the LC_* or LANG environment + * variables. We previously used setlocale() to determine the locale, + * but this does not work on some systems (e.g. Linux/i386 RH 5.0). + */ +#ifdef HAVE_LANGINFO + if (setlocale(LC_CTYPE, "") != NULL) { + Tcl_DString ds; + + /* Use a DString so we can modify case. */ + Tcl_DStringInit(&ds); + encoding = Tcl_DStringAppend(&ds, nl_langinfo(CODESET), -1); + Tcl_UtfToLower(Tcl_DStringValue(&ds)); + /* Check whether it's a known encoding... */ + if (NULL == Tcl_GetEncoding(NULL, encoding)) { + /* ... or in the table if encodings we *should* know */ + for (i = 0; localeTable[i].lang != NULL; i++) { + if (strcmp(localeTable[i].lang, encoding) == 0) { + Tcl_DStringAppend(bufPtr, localeTable[i].encoding, -1); + break; } } -#ifdef HAVE_LANGINFO_DEBUG - fprintf(stderr, " => '%s'\n", encoding); -#endif - Tcl_DStringFree(&ds); + } else { + Tcl_DStringAppend(bufPtr, encoding, -1); } -#ifdef HAVE_LANGINFO_DEBUG - else { - fprintf(stderr, "setlocale returned NULL\n"); + Tcl_DStringFree(&ds); + if (Tcl_DStringLength(bufPtr)) { + return Tcl_DStringValue(bufPtr); } -#endif + } #endif /* HAVE_LANGINFO */ - if (setSysEncCode != TCL_OK) { - /* - * Classic fallback check. This tries a homebrew algorithm to - * determine what encoding should be used based on env vars. - */ - char *langEnv = getenv("LC_ALL"); - encoding = NULL; + /* + * Classic fallback check. This tries a homebrew algorithm to + * determine what encoding should be used based on env vars. + */ + encoding = getenv("LC_ALL"); - if (langEnv == NULL || langEnv[0] == '\0') { - langEnv = getenv("LC_CTYPE"); - } - if (langEnv == NULL || langEnv[0] == '\0') { - langEnv = getenv("LANG"); - } - if (langEnv == NULL || langEnv[0] == '\0') { - langEnv = NULL; - } + if (encoding == NULL || encoding[0] == '\0') { + encoding = getenv("LC_CTYPE"); + } + if (encoding == NULL || encoding[0] == '\0') { + encoding = getenv("LANG"); + } + if (encoding == NULL || encoding[0] == '\0') { + encoding = NULL; + } - if (langEnv != NULL) { - for (i = 0; localeTable[i].lang != NULL; i++) { - if (strcmp(localeTable[i].lang, langEnv) == 0) { - encoding = localeTable[i].encoding; - break; - } - } - /* - * There was no mapping in the locale table. If there is an - * encoding subfield, we can try to guess from that. - */ - - if (encoding == NULL) { - char *p; - for (p = langEnv; *p != '\0'; p++) { - if (*p == '.') { - p++; - break; - } - } - if (*p != '\0') { - Tcl_DString ds; - Tcl_DStringInit(&ds); - encoding = Tcl_DStringAppend(&ds, p, -1); - - Tcl_UtfToLower(Tcl_DStringValue(&ds)); - setSysEncCode = Tcl_SetSystemEncoding(NULL, encoding); - if (setSysEncCode != TCL_OK) { - encoding = NULL; - } - Tcl_DStringFree(&ds); - } - } -#ifdef HAVE_LANGINFO_DEBUG - fprintf(stderr, "encoding fallback check '%s' => '%s'\n", - langEnv, encoding); -#endif - } - if (setSysEncCode != TCL_OK) { - if (encoding == NULL) { - encoding = TCL_DEFAULT_ENCODING; - } + if (encoding != NULL) { + CONST char *p; - Tcl_SetSystemEncoding(NULL, encoding); + /* Check whether it's a known encoding... */ + if (NULL == Tcl_GetEncoding(NULL, encoding)) { + /* ... or in the table if encodings we *should* know */ + for (i = 0; localeTable[i].lang != NULL; i++) { + if (strcmp(localeTable[i].lang, encoding) == 0) { + Tcl_DStringAppend(bufPtr, localeTable[i].encoding, -1); + break; + } } - - /* - * Initialize the C library's locale subsystem. This is required - * for input methods to work properly on X11. We only do this for - * LC_CTYPE because that's the necessary one, and we don't want to - * affect LC_TIME here. The side effect of setting the default - * locale should be to load any locale specific modules that are - * needed by X. [BUG: 5422 3345 4236 2522 2521]. - * In HAVE_LANGINFO, this call is already done above. - */ -#ifndef HAVE_LANGINFO - setlocale(LC_CTYPE, ""); -#endif + } else { + Tcl_DStringAppend(bufPtr, encoding, -1); + } + if (Tcl_DStringLength(bufPtr)) { + return Tcl_DStringValue(bufPtr); } /* - * In case the initial locale is not "C", ensure that the numeric - * processing is done in "C" locale regardless. This is needed because - * Tcl relies on routines like strtod, but should not have locale - * dependent behavior. - */ - - setlocale(LC_NUMERIC, "C"); - - /* - * Until the system encoding was actually set, the library path was - * actually in the native multi-byte encoding, and not really UTF-8 - * as advertised. We cheated as follows: - * - * 1. It was safe to allow the Tcl_SetSystemEncoding() call to - * append the ASCII chars that make up the encoding's filename to - * the names (in the native encoding) of directories in the library - * path, since all Unix multi-byte encodings have ASCII in the - * beginning. - * - * 2. To open the encoding file, the native bytes in the file name - * were passed to the OS, without translating from UTF-8 to native, - * because the name was already in the native encoding. - * - * Now that the system encoding was actually successfully set, - * translate all the names in the library path to UTF-8. That way, - * next time we search the library path, we'll translate the names - * from UTF-8 to the system encoding which will be the native - * encoding. + * We didn't recognize the full value as an encoding name. + * If there is an encoding subfield, we can try to guess from that. */ - pathPtr = TclGetLibraryPath(); - if (pathPtr != NULL) { - int objc; - Tcl_Obj **objv; - - objc = 0; - Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); - for (i = 0; i < objc; i++) { - int length; - char *string; - Tcl_DString ds; - - string = Tcl_GetStringFromObj(objv[i], &length); - Tcl_ExternalToUtfDString(NULL, string, length, &ds); - Tcl_SetStringObj(objv[i], Tcl_DStringValue(&ds), - Tcl_DStringLength(&ds)); - Tcl_DStringFree(&ds); + for (p = encoding; *p != '\0'; p++) { + if (*p == '.') { + p++; + break; } } + if (*p != '\0') { + Tcl_DString ds; + Tcl_DStringInit(&ds); + encoding = Tcl_DStringAppend(&ds, p, -1); + Tcl_UtfToLower(Tcl_DStringValue(&ds)); - libraryPathEncodingFixed = 1; - } + /* Check whether it's a known encoding... */ + if (NULL == Tcl_GetEncoding(NULL, encoding)) { + /* ... or in the table if encodings we *should* know */ + for (i = 0; localeTable[i].lang != NULL; i++) { + if (strcmp(localeTable[i].lang, encoding) == 0) { + Tcl_DStringAppend(bufPtr, localeTable[i].encoding, -1); + break; + } + } + } else { + Tcl_DStringAppend(bufPtr, encoding, -1); + } + Tcl_DStringFree(&ds); + if (Tcl_DStringLength(bufPtr)) { + return Tcl_DStringValue(bufPtr); + } - /* This is only ever called from the startup thread */ - if (binaryEncoding == NULL) { - /* - * Keep the iso8859-1 encoding preloaded. The IO package uses - * it for gets on a binary channel. - */ - binaryEncoding = Tcl_GetEncoding(NULL, "iso8859-1"); + } } + return Tcl_DStringAppend(bufPtr, TCL_DEFAULT_ENCODING, -1); } /* |