diff options
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 2400 |
1 files changed, 1391 insertions, 1009 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 73c4067..fb2f134 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -5,43 +5,41 @@ * * Copyright (c) 1996-1998 Sun Microsystems, Inc. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -#include "tclPort.h" -typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src)); +typedef size_t (LengthProc)(const char *src); /* - * The following data structure represents an encoding, which describes how - * to convert between various character sets and UTF-8. + * The following data structure represents an encoding, which describes how to + * convert between various character sets and UTF-8. */ typedef struct Encoding { - char *name; /* Name of encoding. Malloced because (1) - * hash table entry that owns this encoding - * may be freed prior to this encoding being - * freed, (2) string passed in the - * Tcl_EncodingType structure may not be - * persistent. */ + char *name; /* Name of encoding. Malloced because (1) hash + * table entry that owns this encoding may be + * freed prior to this encoding being freed, + * (2) string passed in the Tcl_EncodingType + * structure may not be persistent. */ Tcl_EncodingConvertProc *toUtfProc; - /* Procedure to convert from external - * encoding into UTF-8. */ + /* Function to convert from external encoding + * into UTF-8. */ Tcl_EncodingConvertProc *fromUtfProc; - /* Procedure to convert from UTF-8 into + /* Function to convert from UTF-8 into * external encoding. */ Tcl_EncodingFreeProc *freeProc; - /* If non-NULL, procedure to call when this + /* If non-NULL, function to call when this * encoding is deleted. */ int nullSize; /* Number of 0x00 bytes that signify - * end-of-string in this encoding. This - * number is used to determine the source - * string length when the srcLen argument is - * negative. This number can be 1 or 2. */ + * end-of-string in this encoding. This number + * is used to determine the source string + * length when the srcLen argument is + * negative. This number can be 1 or 2. */ ClientData clientData; /* Arbitrary value associated with encoding - * type. Passed to conversion procedures. */ + * type. Passed to conversion functions. */ LengthProc *lengthProc; /* Function to compute length of * null-terminated strings in this encoding. * If nullSize is 1, this is strlen; if @@ -54,15 +52,15 @@ typedef struct Encoding { /* * The following structure is the clientData for a dynamically-loaded, - * table-driven encoding created by LoadTableEncoding(). It maps between + * table-driven encoding created by LoadTableEncoding(). It maps between * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) * encoding. */ typedef struct TableEncodingData { - int fallback; /* Character (in this encoding) to - * substitute when this encoding cannot - * represent a UTF-8 character. */ + int fallback; /* Character (in this encoding) to substitute + * when this encoding cannot represent a UTF-8 + * character. */ char prefixBytes[256]; /* If a byte in the input stream is a lead * byte for a 2-byte sequence, the * corresponding entry in this array is 1, @@ -70,14 +68,15 @@ typedef struct TableEncodingData { unsigned short **toUnicode; /* Two dimensional sparse matrix to map * characters from the encoding to Unicode. * Each element of the toUnicode array points - * to an array of 256 shorts. If there is no + * to an array of 256 shorts. If there is no * corresponding character in Unicode, the - * value in the matrix is 0x0000. malloc'd. */ + * value in the matrix is 0x0000. + * malloc'd. */ unsigned short **fromUnicode; /* Two dimensional sparse matrix to map * characters from Unicode to the encoding. * Each element of the fromUnicode array - * points to an array of 256 shorts. If there + * points to an array of 256 shorts. If there * is no corresponding character the encoding, * the value in the matrix is 0x0000. * malloc'd. */ @@ -85,11 +84,11 @@ typedef struct TableEncodingData { /* * The following structures is the clientData for a dynamically-loaded, - * escape-driven encoding that is itself comprised of other simpler - * encodings. An example is "iso-2022-jp", which uses escape sequences to - * switch between ascii, jis0208, jis0212, gb2312, and ksc5601. Note that - * "escape-driven" does not necessarily mean that the ESCAPE character is - * the character used for switching character sets. + * escape-driven encoding that is itself comprised of other simpler encodings. + * An example is "iso-2022-jp", which uses escape sequences to switch between + * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" + * does not necessarily mean that the ESCAPE character is the character used + * for switching character sets. */ typedef struct EscapeSubTable { @@ -102,29 +101,29 @@ typedef struct EscapeSubTable { } EscapeSubTable; typedef struct EscapeEncodingData { - int fallback; /* Character (in this encoding) to - * substitute when this encoding cannot - * represent a UTF-8 character. */ + int fallback; /* Character (in this encoding) to substitute + * when this encoding cannot represent a UTF-8 + * character. */ unsigned int initLen; /* Length of following string. */ char init[16]; /* String to emit or expect before first char * in conversion. */ unsigned int finalLen; /* Length of following string. */ - char final[16]; /* String to emit or expect after last char - * in conversion. */ - char prefixBytes[256]; /* If a byte in the input stream is the - * first character of one of the escape - * sequences in the following array, the - * corresponding entry in this array is 1, - * otherwise it is 0. */ + char final[16]; /* String to emit or expect after last char in + * conversion. */ + char prefixBytes[256]; /* If a byte in the input stream is the first + * character of one of the escape sequences in + * the following array, the corresponding + * entry in this array is 1, otherwise it is + * 0. */ int numSubTables; /* Length of following array. */ - EscapeSubTable subTables[1];/* Information about each EscapeSubTable - * used by this encoding type. The actual - * size will be as large as necessary to - * hold all EscapeSubTables. */ + EscapeSubTable subTables[1];/* Information about each EscapeSubTable used + * by this encoding type. The actual size will + * be as large as necessary to hold all + * EscapeSubTables. */ } EscapeEncodingData; /* - * Constants used when loading an encoding file to identify the type of the + * constants used when loading an encoding file to identify the type of the * file. */ @@ -134,27 +133,51 @@ typedef struct EscapeEncodingData { #define ENCODING_ESCAPE 3 /* - * Initialize the default encoding directory. If this variable contains - * a non NULL value, it will be the first path used to locate the - * system encoding files. + * A list of directories in which Tcl should look for *.enc files. This list + * is shared by all threads. Access is governed by a mutex lock. */ -char *tclDefaultEncodingDir = NULL; +static TclInitProcessGlobalValueProc InitializeEncodingSearchPath; +static ProcessGlobalValue encodingSearchPath = { + 0, 0, NULL, NULL, InitializeEncodingSearchPath, NULL, NULL +}; -static int encodingsInitialized = 0; +/* + * A map from encoding names to the directories in which their data files have + * been seen. The string value of the map is shared by all threads. Access to + * the shared string is governed by a mutex lock. + */ + +static ProcessGlobalValue encodingFileMap = { + 0, 0, NULL, NULL, NULL, NULL, NULL +}; /* - * Hash table that keeps track of all loaded Encodings. Keys are - * the string names that represent the encoding, values are (Encoding *). + * A list of directories making up the "library path". Historically this + * search path has served many uses, but the only one remaining is a base for + * the encodingSearchPath above. If the application does not explicitly set + * the encodingSearchPath, then it will be initialized by appending /encoding + * to each directory in this "libraryPath". */ - + +static ProcessGlobalValue libraryPath = { + 0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL +}; + +static int encodingsInitialized = 0; + +/* + * Hash table that keeps track of all loaded Encodings. Keys are the string + * names that represent the encoding, values are (Encoding *). + */ + static Tcl_HashTable encodingTable; TCL_DECLARE_MUTEX(encodingMutex) /* - * The following are used to hold the default and current system encodings. - * If NULL is passed to one of the conversion routines, the current setting - * of the system encoding will be used to perform the conversion. + * The following are used to hold the default and current system encodings. + * If NULL is passed to one of the conversion routines, the current setting of + * the system encoding will be used to perform the conversion. */ static Tcl_Encoding defaultEncoding; @@ -168,125 +191,129 @@ static Tcl_Encoding systemEncoding; static unsigned short emptyPage[256]; /* - * Procedures used only in this module. + * Functions used only in this module. */ -static int BinaryProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, +static int BinaryProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static void DupEncodingIntRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *dupPtr)); -static void EscapeFreeProc _ANSI_ARGS_((ClientData clientData)); -static int EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static void DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr); +static void EscapeFreeProc(ClientData clientData); +static int EscapeFromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int EscapeToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int EscapeToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static void FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding)); -static void FreeEncodingIntRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static Encoding * GetTableEncoding _ANSI_ARGS_(( - EscapeEncodingData *dataPtr, int state)); -static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp, - CONST char *name)); -static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((Tcl_Interp *interp, - CONST char *name, int type, Tcl_Channel chan)); -static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, - Tcl_Channel chan)); -static Tcl_Channel OpenEncodingFile _ANSI_ARGS_((CONST char *dir, - CONST char *name)); -static void TableFreeProc _ANSI_ARGS_((ClientData clientData)); -static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static void FillEncodingFileMap(void); +static void FreeEncoding(Tcl_Encoding encoding); +static void FreeEncodingIntRep(Tcl_Obj *objPtr); +static Encoding * GetTableEncoding(EscapeEncodingData *dataPtr, + int state); +static Tcl_Encoding LoadEncodingFile(Tcl_Interp *interp, const char *name); +static Tcl_Encoding LoadTableEncoding(const char *name, int type, + Tcl_Channel chan); +static Tcl_Encoding LoadEscapeEncoding(const char *name, Tcl_Channel chan); +static Tcl_Channel OpenEncodingFileChannel(Tcl_Interp *interp, + const char *name); +static void TableFreeProc(ClientData clientData); +static int TableFromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int TableToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int TableToUtfProc(ClientData clientData, const char *src, + int srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, int dstLen, int *srcReadPtr, + int *dstWrotePtr, int *dstCharsPtr); +static size_t unilen(const char *src); +static int UnicodeToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static size_t unilen _ANSI_ARGS_((CONST char *src)); -static int UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfToUnicodeProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr, int pureNullMode); +static int UtfIntToUtfExtProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr, int pureNullMode)); -static int UtfIntToUtfExtProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfExtToUtfIntProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int Iso88591FromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int TclFindEncodings _ANSI_ARGS_((CONST char *argv0)); + int *dstCharsPtr); +static int Iso88591ToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, + Tcl_EncodingState *statePtr, char *dst, + int dstLen, int *srcReadPtr, int *dstWrotePtr, + int *dstCharsPtr); /* - * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep. - * This should help the lifetime of encodings be more useful. + * A Tcl_ObjType for holding a cached Tcl_Encoding in the otherValuePtr field + * of the intrep. This should help the lifetime of encodings be more useful. * See concerns raised in [Bug 1077262]. */ -static Tcl_ObjType EncodingType = { +static Tcl_ObjType encodingType = { "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL }; - /* *---------------------------------------------------------------------- * - * TclGetEncodingFromObj -- + * Tcl_GetEncodingFromObj -- * - * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), - * if possible, and returns TCL_OK. If no such encoding exists, - * TCL_ERROR is returned, and if interp is non-NULL, an error message - * is written there. + * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), if + * possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR is + * returned, and if interp is non-NULL, an error message is written + * there. * * Results: - * Standard Tcl return code. + * Standard Tcl return code. * * Side effects: * Caches the Tcl_Encoding value as the internal rep of (*objPtr). * *---------------------------------------------------------------------- */ -int -TclGetEncodingFromObj(interp, objPtr, encodingPtr) - Tcl_Interp *interp; - Tcl_Obj *objPtr; - Tcl_Encoding *encodingPtr; + +int +Tcl_GetEncodingFromObj( + Tcl_Interp *interp, + Tcl_Obj *objPtr, + Tcl_Encoding *encodingPtr) { - CONST char *name = Tcl_GetString(objPtr); - if (objPtr->typePtr != &EncodingType) { + const char *name = Tcl_GetString(objPtr); + if (objPtr->typePtr != &encodingType) { Tcl_Encoding encoding = Tcl_GetEncoding(interp, name); if (encoding == NULL) { return TCL_ERROR; } - if (objPtr->typePtr && objPtr->typePtr->freeIntRepProc) { - objPtr->typePtr->freeIntRepProc(objPtr); - } + TclFreeIntRep(objPtr); objPtr->internalRep.otherValuePtr = (VOID *) encoding; - objPtr->typePtr = &EncodingType; + objPtr->typePtr = &encodingType; } *encodingPtr = Tcl_GetEncoding(NULL, name); return TCL_OK; @@ -297,15 +324,17 @@ TclGetEncodingFromObj(interp, objPtr, encodingPtr) * * FreeEncodingIntRep -- * - * The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType. + * The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType. * *---------------------------------------------------------------------- */ + static void -FreeEncodingIntRep(objPtr) - Tcl_Obj *objPtr; +FreeEncodingIntRep( + Tcl_Obj *objPtr) { Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr); + objPtr->typePtr = NULL; } /* @@ -313,26 +342,193 @@ FreeEncodingIntRep(objPtr) * * DupEncodingIntRep -- * - * The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType. + * The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType. * *---------------------------------------------------------------------- */ + static void -DupEncodingIntRep(srcPtr, dupPtr) - Tcl_Obj *srcPtr; - Tcl_Obj *dupPtr; +DupEncodingIntRep( + Tcl_Obj *srcPtr, + Tcl_Obj *dupPtr) { dupPtr->internalRep.otherValuePtr = (VOID *) Tcl_GetEncoding(NULL, srcPtr->bytes); } /* + *---------------------------------------------------------------------- + * + * Tcl_GetEncodingSearchPath -- + * + * Keeps the per-thread copy of the encoding search path current with + * changes to the global copy. + * + * Results: + * Returns a "list" (Tcl_Obj *) that contains the encoding search path. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +Tcl_GetEncodingSearchPath(void) +{ + return TclGetProcessGlobalValue(&encodingSearchPath); +} + +/* + *---------------------------------------------------------------------- + * + * Tcl_SetEncodingSearchPath -- + * + * Keeps the per-thread copy of the encoding search path current with + * changes to the global copy. + * + *---------------------------------------------------------------------- + */ + +int +Tcl_SetEncodingSearchPath( + Tcl_Obj *searchPath) +{ + int dummy; + + if (TCL_ERROR == Tcl_ListObjLength(NULL, searchPath, &dummy)) { + return TCL_ERROR; + } + TclSetProcessGlobalValue(&encodingSearchPath, searchPath, NULL); + return TCL_OK; +} + +/* + *---------------------------------------------------------------------- + * + * TclGetLibraryPath -- + * + * Keeps the per-thread copy of the library path current with changes to + * the global copy. + * + * Results: + * Returns a "list" (Tcl_Obj *) that contains the library path. + * + *---------------------------------------------------------------------- + */ + +Tcl_Obj * +TclGetLibraryPath(void) +{ + return TclGetProcessGlobalValue(&libraryPath); +} + +/* + *---------------------------------------------------------------------- + * + * TclSetLibraryPath -- + * + * Keeps the per-thread copy of the library path current with changes to + * the global copy. + * + * NOTE: this routine returns void, so there's no way to report the error + * that searchPath is not a valid list. In that case, this routine will + * silently do nothing. + * + *---------------------------------------------------------------------- + */ + +void +TclSetLibraryPath( + Tcl_Obj *path) +{ + int dummy; + + if (TCL_ERROR == Tcl_ListObjLength(NULL, path, &dummy)) { + return; + } + TclSetProcessGlobalValue(&libraryPath, path, NULL); +} + +/* + *--------------------------------------------------------------------------- + * + * FillEncodingFileMap -- + * + * Called to bring the encoding file map in sync with the current value + * of the encoding search path. + * + * Scan the directories on the encoding search path, find the *.enc + * files, and store the found pathnames in a map associated with the + * encoding name. + * + * In particular, if $dir is on the encoding search path, and the file + * $dir/foo.enc is found, then store a "foo" -> $dir entry in the map. + * Later, any need for the "foo" encoding will quickly * be able to + * construct the $dir/foo.enc pathname for reading the encoding data. + * + * Results: + * None. + * + * Side effects: + * Entries are added to the encoding file map. + * + *--------------------------------------------------------------------------- + */ + +static void +FillEncodingFileMap(void) +{ + int i, numDirs = 0; + Tcl_Obj *map, *searchPath; + + searchPath = Tcl_GetEncodingSearchPath(); + Tcl_IncrRefCount(searchPath); + Tcl_ListObjLength(NULL, searchPath, &numDirs); + map = Tcl_NewDictObj(); + Tcl_IncrRefCount(map); + + for (i = numDirs-1; i >= 0; i--) { + /* + * Iterate backwards through the search path so as we overwrite + * entries found, we favor files earlier on the search path. + */ + + int j, numFiles; + Tcl_Obj *directory, *matchFileList = Tcl_NewObj(); + Tcl_Obj **filev; + Tcl_GlobTypeData readableFiles = { + TCL_GLOB_TYPE_FILE, TCL_GLOB_PERM_R, NULL, NULL + }; + + Tcl_ListObjIndex(NULL, searchPath, i, &directory); + Tcl_IncrRefCount(directory); + Tcl_IncrRefCount(matchFileList); + Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc", + &readableFiles); + + Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev); + for (j=0; j<numFiles; j++) { + Tcl_Obj *encodingName, *file; + + file = TclPathPart(NULL, filev[j], TCL_PATH_TAIL); + encodingName = TclPathPart(NULL, file, TCL_PATH_ROOT); + Tcl_DictObjPut(NULL, map, encodingName, directory); + Tcl_DecrRefCount(file); + Tcl_DecrRefCount(encodingName); + } + Tcl_DecrRefCount(matchFileList); + Tcl_DecrRefCount(directory); + } + Tcl_DecrRefCount(searchPath); + TclSetProcessGlobalValue(&encodingFileMap, map, NULL); + Tcl_DecrRefCount(map); +} + +/* *--------------------------------------------------------------------------- * * TclInitEncodingSubsystem -- * * Initialize all resources used by this subsystem on a per-process - * basis. + * basis. * * Results: * None. @@ -344,18 +540,22 @@ DupEncodingIntRep(srcPtr, dupPtr) */ void -TclInitEncodingSubsystem() +TclInitEncodingSubsystem(void) { Tcl_EncodingType type; + if (encodingsInitialized) { + return; + } + Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); - + /* - * Create a few initial encodings. Note that the UTF-8 to UTF-8 - * translation is not a no-op, because it will turn a stream of - * improperly formed UTF-8 into a properly formed stream. + * Create a few initial encodings. Note that the UTF-8 to UTF-8 + * translation is not a no-op, because it will turn a stream of improperly + * formed UTF-8 into a properly formed stream. */ type.encodingName = "identity"; @@ -383,8 +583,53 @@ TclInitEncodingSubsystem() type.nullSize = 2; type.clientData = NULL; Tcl_CreateEncoding(&type); -} + /* + * Need the iso8859-1 encoding in order to process binary data, so force + * it to always be embedded. Note that this encoding *must* be a proper + * table encoding or some of the escape encodings crash! Hence the ugly + * code to duplicate the structure of a table encoding here. + */ + + { + TableEncodingData *dataPtr = (TableEncodingData *) + ckalloc(sizeof(TableEncodingData)); + unsigned size; + unsigned short i; + + memset(dataPtr, 0, sizeof(TableEncodingData)); + dataPtr->fallback = '?'; + + size = 256*(sizeof(unsigned short *) + sizeof(unsigned short)); + dataPtr->toUnicode = (unsigned short **) ckalloc(size); + memset(dataPtr->toUnicode, 0, size); + dataPtr->fromUnicode = (unsigned short **) ckalloc(size); + memset(dataPtr->fromUnicode, 0, size); + + dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256); + dataPtr->fromUnicode[0] = (unsigned short *) + (dataPtr->fromUnicode + 256); + for (i=1 ; i<256 ; i++) { + dataPtr->toUnicode[i] = emptyPage; + dataPtr->fromUnicode[i] = emptyPage; + } + + for (i=0 ; i<256 ; i++) { + dataPtr->toUnicode[0][i] = i; + dataPtr->fromUnicode[0][i] = i; + } + + type.encodingName = "iso8859-1"; + type.toUtfProc = Iso88591ToUtfProc; + type.fromUtfProc = Iso88591FromUtfProc; + type.freeProc = TableFreeProc; + type.nullSize = 1; + type.clientData = dataPtr; + Tcl_CreateEncoding(&type); + } + + encodingsInitialized = 1; +} /* *---------------------------------------------------------------------- @@ -403,25 +648,28 @@ TclInitEncodingSubsystem() */ void -TclFinalizeEncodingSubsystem() +TclFinalizeEncodingSubsystem(void) { Tcl_HashSearch search; Tcl_HashEntry *hPtr; Tcl_MutexLock(&encodingMutex); - encodingsInitialized = 0; + encodingsInitialized = 0; FreeEncoding(systemEncoding); + hPtr = Tcl_FirstHashEntry(&encodingTable, &search); while (hPtr != NULL) { /* * Call FreeEncoding instead of doing it directly to handle refcounts - * like escape encodings use. [Bug #524674] - * Make sure to call Tcl_FirstHashEntry repeatedly so that all - * encodings are eventually cleaned up. + * like escape encodings use. [Bug 524674] Make sure to call + * Tcl_FirstHashEntry repeatedly so that all encodings are eventually + * cleaned up. */ + FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr)); hPtr = Tcl_FirstHashEntry(&encodingTable, &search); } + Tcl_DeleteHashTable(&encodingTable); Tcl_MutexUnlock(&encodingMutex); } @@ -431,18 +679,32 @@ TclFinalizeEncodingSubsystem() * * Tcl_GetDefaultEncodingDir -- * + * Legacy public interface to retrieve first directory in the encoding + * searchPath. * * Results: + * The directory pathname, as a string, or NULL for an empty encoding + * search path. * * Side effects: + * None. * *------------------------------------------------------------------------- */ -CONST char * -Tcl_GetDefaultEncodingDir() +const char * +Tcl_GetDefaultEncodingDir(void) { - return tclDefaultEncodingDir; + int numDirs; + Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath(); + + Tcl_ListObjLength(NULL, searchPath, &numDirs); + if (numDirs == 0) { + return NULL; + } + Tcl_ListObjIndex(NULL, searchPath, 0, &first); + + return Tcl_GetString(first); } /* @@ -450,20 +712,28 @@ Tcl_GetDefaultEncodingDir() * * Tcl_SetDefaultEncodingDir -- * + * Legacy public interface to set the first directory in the encoding + * search path. * * Results: + * None. * * Side effects: + * Modifies the encoding search path. * *------------------------------------------------------------------------- */ void -Tcl_SetDefaultEncodingDir(path) - CONST char *path; +Tcl_SetDefaultEncodingDir( + const char *path) { - tclDefaultEncodingDir = (char *)ckalloc((unsigned) strlen(path) + 1); - strcpy(tclDefaultEncodingDir, path); + Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath(); + Tcl_Obj *directory = Tcl_NewStringObj(path, -1); + + searchPath = Tcl_DuplicateObj(searchPath); + Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); + Tcl_SetEncodingSearchPath(searchPath); } /* @@ -472,29 +742,29 @@ Tcl_SetDefaultEncodingDir(path) * Tcl_GetEncoding -- * * Given the name of a encoding, find the corresponding Tcl_Encoding - * token. If the encoding did not already exist, Tcl attempts to + * token. If the encoding did not already exist, Tcl attempts to * dynamically load an encoding by that name. * * Results: - * Returns a token that represents the encoding. If the name didn't - * refer to any known or loadable encoding, NULL is returned. If - * NULL was returned, an error message is left in interp's result - * object, unless interp was NULL. + * Returns a token that represents the encoding. If the name didn't refer + * to any known or loadable encoding, NULL is returned. If NULL was + * returned, an error message is left in interp's result object, unless + * interp was NULL. * * Side effects: * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to - * this procedure, there should eventually be a call to - * Tcl_FreeEncoding, so that the database can be cleaned up when - * encodings aren't needed anymore. + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so + * that the database can be cleaned up when encodings aren't needed + * anymore. * *------------------------------------------------------------------------- */ Tcl_Encoding -Tcl_GetEncoding(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the desired encoding. */ +Tcl_GetEncoding( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the desired encoding. */ { Tcl_HashEntry *hPtr; Encoding *encodingPtr; @@ -515,6 +785,7 @@ Tcl_GetEncoding(interp, name) return (Tcl_Encoding) encodingPtr; } Tcl_MutexUnlock(&encodingMutex); + return LoadEncodingFile(interp, name); } @@ -523,22 +794,22 @@ Tcl_GetEncoding(interp, name) * * Tcl_FreeEncoding -- * - * This procedure is called to release an encoding allocated by + * This function is called to release an encoding allocated by * Tcl_CreateEncoding() or Tcl_GetEncoding(). * * Results: * None. * * Side effects: - * The reference count associated with the encoding is decremented - * and the encoding may be deleted if nothing is using it anymore. + * The reference count associated with the encoding is decremented and + * the encoding may be deleted if nothing is using it anymore. * *--------------------------------------------------------------------------- */ void -Tcl_FreeEncoding(encoding) - Tcl_Encoding encoding; +Tcl_FreeEncoding( + Tcl_Encoding encoding) { Tcl_MutexLock(&encodingMutex); FreeEncoding(encoding); @@ -550,25 +821,25 @@ Tcl_FreeEncoding(encoding) * * FreeEncoding -- * - * This procedure is called to release an encoding by procedures - * that already have the encodingMutex. + * This function is called to release an encoding by functions that + * already have the encodingMutex. * * Results: * None. * * Side effects: - * The reference count associated with the encoding is decremented - * and the encoding may be deleted if nothing is using it anymore. + * The reference count associated with the encoding is decremented and + * the encoding may be deleted if nothing is using it anymore. * *---------------------------------------------------------------------- */ static void -FreeEncoding(encoding) - Tcl_Encoding encoding; +FreeEncoding( + Tcl_Encoding encoding) { Encoding *encodingPtr; - + encodingPtr = (Encoding *) encoding; if (encodingPtr == NULL) { return; @@ -594,8 +865,8 @@ FreeEncoding(encoding) * * Tcl_GetEncodingName -- * - * Given an encoding, return the name that was used to constuct - * the encoding. + * Given an encoding, return the name that was used to constuct the + * encoding. * * Results: * The name of the encoding. @@ -606,17 +877,15 @@ FreeEncoding(encoding) *--------------------------------------------------------------------------- */ -CONST char * -Tcl_GetEncodingName(encoding) - Tcl_Encoding encoding; /* The encoding whose name to fetch. */ +const char * +Tcl_GetEncodingName( + Tcl_Encoding encoding) /* The encoding whose name to fetch. */ { - Encoding *encodingPtr; - if (encoding == NULL) { encoding = systemEncoding; } - encodingPtr = (Encoding *) encoding; - return encodingPtr->name; + + return ((Encoding *) encoding)->name; } /* @@ -624,8 +893,8 @@ Tcl_GetEncodingName(encoding) * * Tcl_GetEncodingNames -- * - * Get the list of all known encodings, including the ones stored - * as files on disk in the encoding path. + * Get the list of all known encodings, including the ones stored as + * files on disk in the encoding path. * * Results: * Modifies interp's result object to hold a list of all the available @@ -638,102 +907,53 @@ Tcl_GetEncodingName(encoding) */ void -Tcl_GetEncodingNames(interp) - Tcl_Interp *interp; /* Interp to hold result. */ +Tcl_GetEncodingNames( + Tcl_Interp *interp) /* Interp to hold result. */ { + Tcl_HashTable table; Tcl_HashSearch search; Tcl_HashEntry *hPtr; - Tcl_Obj *pathPtr, *resultPtr; - int dummy; + Tcl_Obj *map, *name, *result = Tcl_NewObj(); + Tcl_DictSearch mapSearch; + int dummy, done = 0; - Tcl_HashTable table; + Tcl_InitObjHashTable(&table); + + /* + * Copy encoding names from loaded encoding table to table. + */ Tcl_MutexLock(&encodingMutex); - Tcl_InitHashTable(&table, TCL_STRING_KEYS); - hPtr = Tcl_FirstHashEntry(&encodingTable, &search); - while (hPtr != NULL) { - Encoding *encodingPtr; - - encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); - Tcl_CreateHashEntry(&table, encodingPtr->name, &dummy); - hPtr = Tcl_NextHashEntry(&search); + for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL; + hPtr = Tcl_NextHashEntry(&search)) { + Encoding *encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); + Tcl_CreateHashEntry(&table, + (char *) Tcl_NewStringObj(encodingPtr->name, -1), &dummy); } Tcl_MutexUnlock(&encodingMutex); - pathPtr = TclGetLibraryPath(); - if (pathPtr != NULL) { - int i, objc; - Tcl_Obj **objv; - char globArgString[10]; - Tcl_Obj* encodingObj = Tcl_NewStringObj("encoding",-1); - Tcl_IncrRefCount(encodingObj); - - objc = 0; - Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); - - for (i = 0; i < objc; i++) { - Tcl_Obj *searchIn; - - /* - * Construct the path from the element of pathPtr, - * joined with 'encoding'. - */ - searchIn = Tcl_FSJoinToPath(objv[i],1,&encodingObj); - Tcl_IncrRefCount(searchIn); - Tcl_ResetResult(interp); + FillEncodingFileMap(); + map = TclGetProcessGlobalValue(&encodingFileMap); - /* - * TclGlob() changes the contents of globArgString, which causes - * a segfault if we pass in a pointer to non-writeable memory. - * TclGlob() puts its results directly into interp. - */ + /* + * Copy encoding names from encoding file map to table. + */ - strcpy(globArgString, "*.enc"); - /* - * The GLOBMODE_TAILS flag returns just the tail of each file - * which is the encoding name with a .enc extension - */ - if ((TclGlob(interp, globArgString, searchIn, - TCL_GLOBMODE_TAILS, NULL) == TCL_OK)) { - int objc2 = 0; - Tcl_Obj **objv2; - int j; - - Tcl_ListObjGetElements(NULL, Tcl_GetObjResult(interp), &objc2, - &objv2); - - for (j = 0; j < objc2; j++) { - int length; - char *string; - string = Tcl_GetStringFromObj(objv2[j], &length); - length -= 4; - if (length > 0) { - string[length] = '\0'; - Tcl_CreateHashEntry(&table, string, &dummy); - string[length] = '.'; - } - } - } - Tcl_DecrRefCount(searchIn); - } - Tcl_DecrRefCount(encodingObj); + Tcl_DictObjFirst(NULL, map, &mapSearch, &name, NULL, &done); + for (; !done; Tcl_DictObjNext(&mapSearch, &name, NULL, &done)) { + Tcl_CreateHashEntry(&table, (char *) name, &dummy); } /* - * Clear any values placed in the result by globbing. + * Pull all encoding names from table into the result list. */ - Tcl_ResetResult(interp); - resultPtr = Tcl_GetObjResult(interp); - - hPtr = Tcl_FirstHashEntry(&table, &search); - while (hPtr != NULL) { - Tcl_Obj *strPtr; - - strPtr = Tcl_NewStringObj(Tcl_GetHashKey(&table, hPtr), -1); - Tcl_ListObjAppendElement(NULL, resultPtr, strPtr); - hPtr = Tcl_NextHashEntry(&search); + for (hPtr = Tcl_FirstHashEntry(&table, &search); hPtr != NULL; + hPtr = Tcl_NextHashEntry(&search)) { + Tcl_ListObjAppendElement(NULL, result, + (Tcl_Obj *) Tcl_GetHashKey(&table, hPtr)); } + Tcl_SetObjResult(interp, result); Tcl_DeleteHashTable(&table); } @@ -742,29 +962,29 @@ Tcl_GetEncodingNames(interp) * * Tcl_SetSystemEncoding -- * - * Sets the default encoding that should be used whenever the user - * passes a NULL value in to one of the conversion routines. - * If the supplied name is NULL, the system encoding is reset to the - * default system encoding. + * Sets the default encoding that should be used whenever the user passes + * a NULL value in to one of the conversion routines. If the supplied + * name is NULL, the system encoding is reset to the default system + * encoding. * * Results: - * The return value is TCL_OK if the system encoding was successfully - * set to the encoding specified by name, TCL_ERROR otherwise. If - * TCL_ERROR is returned, an error message is left in interp's result - * object, unless interp was NULL. + * The return value is TCL_OK if the system encoding was successfully set + * to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR + * is returned, an error message is left in interp's result object, + * unless interp was NULL. * * Side effects: - * The reference count of the new system encoding is incremented. - * The reference count of the old system encoding is decremented and - * it may be freed. + * The reference count of the new system encoding is incremented. The + * reference count of the old system encoding is decremented and it may + * be freed. * *------------------------------------------------------------------------ */ int -Tcl_SetSystemEncoding(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the desired encoding, or NULL/"" +Tcl_SetSystemEncoding( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the desired encoding, or NULL/"" * to reset to default encoding. */ { Tcl_Encoding encoding; @@ -796,50 +1016,51 @@ Tcl_SetSystemEncoding(interp, name) * * Tcl_CreateEncoding -- * - * This procedure is called to define a new encoding and the procedures - * that are used to convert between the specified encoding and Unicode. + * This function is called to define a new encoding and the functions + * that are used to convert between the specified encoding and Unicode. * * Results: - * Returns a token that represents the encoding. If an encoding with - * the same name already existed, the old encoding token remains - * valid and continues to behave as it used to, and will eventually - * be garbage collected when the last reference to it goes away. Any - * subsequent calls to Tcl_GetEncoding with the specified name will - * retrieve the most recent encoding token. + * Returns a token that represents the encoding. If an encoding with the + * same name already existed, the old encoding token remains valid and + * continues to behave as it used to, and will eventually be garbage + * collected when the last reference to it goes away. Any subsequent + * calls to Tcl_GetEncoding with the specified name will retrieve the + * most recent encoding token. * * Side effects: * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to - * this procedure, there should eventually be a call to - * Tcl_FreeEncoding, so that the database can be cleaned up when - * encodings aren't needed anymore. + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so + * that the database can be cleaned up when encodings aren't needed + * anymore. * *--------------------------------------------------------------------------- - */ + */ Tcl_Encoding -Tcl_CreateEncoding(typePtr) - Tcl_EncodingType *typePtr; /* The encoding type. */ +Tcl_CreateEncoding( + const Tcl_EncodingType *typePtr) + /* The encoding type. */ { Tcl_HashEntry *hPtr; - int new; + int isNew; Encoding *encodingPtr; char *name; Tcl_MutexLock(&encodingMutex); - hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new); - if (new == 0) { + hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew); + if (isNew == 0) { /* - * Remove old encoding from hash table, but don't delete it until - * last reference goes away. + * Remove old encoding from hash table, but don't delete it until last + * reference goes away. */ - + encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); encodingPtr->hPtr = NULL; } name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1); - + encodingPtr = (Encoding *) ckalloc(sizeof(Encoding)); encodingPtr->name = strcpy(name, typePtr->encodingName); encodingPtr->toUtfProc = typePtr->toUtfProc; @@ -866,15 +1087,15 @@ Tcl_CreateEncoding(typePtr) * * Tcl_ExternalToUtfDString -- * - * Convert a source buffer from the specified encoding into UTF-8. - * If any of the bytes in the source buffer are invalid or cannot - * be represented in the target encoding, a default fallback - * character will be substituted. + * Convert a source buffer from the specified encoding into UTF-8. If any + * of the bytes in the source buffer are invalid or cannot be represented + * in the target encoding, a default fallback character will be + * substituted. * * Results: * The converted bytes are stored in the DString, which is then NULL - * terminated. The return value is a pointer to the value stored - * in the DString. + * terminated. The return value is a pointer to the value stored in the + * DString. * * Side effects: * None. @@ -882,15 +1103,15 @@ Tcl_CreateEncoding(typePtr) *------------------------------------------------------------------------- */ -char * -Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) - Tcl_Encoding encoding; /* The encoding for the source string, or - * NULL for the default system encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes, or < 0 for +char * +Tcl_ExternalToUtfDString( + Tcl_Encoding encoding, /* The encoding for the source string, or NULL + * for the default system encoding. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - Tcl_DString *dstPtr; /* Uninitialized or free DString in which - * the converted string is stored. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ { char *dst; Tcl_EncodingState state; @@ -900,7 +1121,7 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; - + if (encoding == NULL) { encoding = systemEncoding; } @@ -911,16 +1132,20 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) } else if (srcLen < 0) { srcLen = (*encodingPtr->lengthProc)(src); } + flags = TCL_ENCODING_START | TCL_ENCODING_END; + while (1) { result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + if (result != TCL_CONVERT_NOSPACE) { Tcl_DStringSetLength(dstPtr, soFar); return Tcl_DStringValue(dstPtr); } + flags &= ~TCL_ENCODING_START; src += srcRead; srcLen -= srcRead; @@ -942,50 +1167,49 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) * * Results: * The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, - * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, - * as documented in tcl.h. + * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as + * documented in tcl.h. * * Side effects: - * The converted bytes are stored in the output buffer. + * The converted bytes are stored in the output buffer. * *------------------------------------------------------------------------- */ int -Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, - dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) - Tcl_Interp *interp; /* Interp for error return, if not NULL. */ - Tcl_Encoding encoding; /* The encoding for the source string, or - * NULL for the default system encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes, or < 0 for +Tcl_ExternalToUtf( + Tcl_Interp *interp, /* Interp for error return, if not NULL. */ + Tcl_Encoding encoding, /* The encoding for the source string, or NULL + * for the default system encoding. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { Encoding *encodingPtr; int result, srcRead, dstWrote, dstChars; Tcl_EncodingState state; - + if (encoding == NULL) { encoding = systemEncoding; } @@ -1012,8 +1236,8 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, /* * If there are any null characters in the middle of the buffer, they will - * converted to the UTF-8 null character (\xC080). To get the actual - * \0 at the end of the destination buffer, we need to append it manually. + * converted to the UTF-8 null character (\xC080). To get the actual \0 at + * the end of the destination buffer, we need to append it manually. */ dstLen--; @@ -1021,6 +1245,7 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); dst[*dstWrotePtr] = '\0'; + return result; } @@ -1029,15 +1254,15 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, * * Tcl_UtfToExternalDString -- * - * Convert a source buffer from UTF-8 into the specified encoding. - * If any of the bytes in the source buffer are invalid or cannot - * be represented in the target encoding, a default fallback - * character will be substituted. + * Convert a source buffer from UTF-8 into the specified encoding. If any + * of the bytes in the source buffer are invalid or cannot be represented + * in the target encoding, a default fallback character will be + * substituted. * * Results: - * The converted bytes are stored in the DString, which is then - * NULL terminated in an encoding-specific manner. The return value - * is a pointer to the value stored in the DString. + * The converted bytes are stored in the DString, which is then NULL + * terminated in an encoding-specific manner. The return value is a + * pointer to the value stored in the DString. * * Side effects: * None. @@ -1046,20 +1271,20 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, */ char * -Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) - Tcl_Encoding encoding; /* The encoding for the converted string, - * or NULL for the default system encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes, or < 0 for +Tcl_UtfToExternalDString( + Tcl_Encoding encoding, /* The encoding for the converted string, or + * NULL for the default system encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - Tcl_DString *dstPtr; /* Uninitialized or free DString in which - * the converted string is stored. */ + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the + * converted string is stored. */ { char *dst; Tcl_EncodingState state; Encoding *encodingPtr; int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; - + Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); dstLen = dstPtr->spaceAvl - 1; @@ -1080,13 +1305,15 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); + if (result != TCL_CONVERT_NOSPACE) { if (encodingPtr->nullSize == 2) { - Tcl_DStringSetLength(dstPtr, soFar + 1); + Tcl_DStringSetLength(dstPtr, soFar + 1); } Tcl_DStringSetLength(dstPtr, soFar); return Tcl_DStringValue(dstPtr); } + flags &= ~TCL_ENCODING_START; src += srcRead; srcLen -= srcRead; @@ -1108,50 +1335,49 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) * * Results: * The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE, - * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, - * as documented in tcl.h. + * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as + * documented in tcl.h. * * Side effects: - * The converted bytes are stored in the output buffer. + * The converted bytes are stored in the output buffer. * *------------------------------------------------------------------------- */ int -Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, - dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) - Tcl_Interp *interp; /* Interp for error return, if not NULL. */ - Tcl_Encoding encoding; /* The encoding for the converted string, - * or NULL for the default system encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes, or < 0 for +Tcl_UtfToExternal( + Tcl_Interp *interp, /* Interp for error return, if not NULL. */ + Tcl_Encoding encoding, /* The encoding for the converted string, or + * NULL for the default system encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string + char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { Encoding *encodingPtr; int result, srcRead, dstWrote, dstChars; Tcl_EncodingState state; - + if (encoding == NULL) { encoding = systemEncoding; } @@ -1184,7 +1410,7 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, dst[*dstWrotePtr + 1] = '\0'; } dst[*dstWrotePtr] = '\0'; - + return result; } @@ -1193,78 +1419,142 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, * * Tcl_FindExecutable -- * - * This procedure computes the absolute path name of the current + * This function computes the absolute path name of the current * application, given its argv[0] value. * * Results: * None. * * Side effects: - * The variable tclExecutableName gets filled in with the file - * name for the application, if we figured it out. If we couldn't - * figure it out, tclExecutableName is set to NULL. + * The absolute pathname for the application is computed and stored to be + * returned later be [info nameofexecutable]. * *--------------------------------------------------------------------------- */ void -Tcl_FindExecutable(argv0) - CONST char *argv0; /* The value of the application's argv[0] +Tcl_FindExecutable( + const char *argv0) /* The value of the application's argv[0] * (native). */ { - int mustCleanUtf; - CONST char *name; - Tcl_DString buffer, nameString; + TclInitSubsystems(); + TclpSetInitialEncodings(); + TclpFindExecutable(argv0); +} + +/* + *--------------------------------------------------------------------------- + * + * OpenEncodingFileChannel -- + * + * Open the file believed to hold data for the encoding, "name". + * + * Results: + * Returns the readable Tcl_Channel from opening the file, or NULL if the + * file could not be successfully opened. If NULL was returned, an error + * message is left in interp's result object, unless interp was NULL. + * + * Side effects: + * Channel may be opened. Information about the filesystem may be cached + * to speed later calls. + * + *--------------------------------------------------------------------------- + */ + +static Tcl_Channel +OpenEncodingFileChannel( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the encoding file on disk and + * also the name for new encoding. */ +{ + Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1); + Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj); + Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath()); + Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); + Tcl_Obj **dir, *path, *directory = NULL; + Tcl_Channel chan = NULL; + int i, numDirs; + + Tcl_ListObjGetElements(NULL, searchPath, &numDirs, &dir); + Tcl_IncrRefCount(nameObj); + Tcl_AppendToObj(fileNameObj, ".enc", -1); + Tcl_IncrRefCount(fileNameObj); + Tcl_DictObjGet(NULL, map, nameObj, &directory); - TclInitSubsystems(argv0); + /* + * Check that any cached directory is still on the encoding search path. + */ - if (argv0 == NULL) { - goto done; - } - if (tclExecutableName != NULL) { - ckfree(tclExecutableName); - tclExecutableName = NULL; + if (NULL != directory) { + int verified = 0; + + for (i=0; i<numDirs && !verified; i++) { + if (dir[i] == directory) { + verified = 1; + } + } + if (!verified) { + const char *dirString = Tcl_GetString(directory); + for (i=0; i<numDirs && !verified; i++) { + if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) { + verified = 1; + } + } + } + if (!verified) { + /* + * Directory no longer on the search path. Remove from cache. + */ + + map = Tcl_DuplicateObj(map); + Tcl_DictObjRemove(NULL, map, nameObj); + TclSetProcessGlobalValue(&encodingFileMap, map, NULL); + directory = NULL; + } } - if ((name = TclpFindExecutable(argv0)) == NULL) { - goto done; + + if (NULL != directory) { + /* + * Got a directory from the cache. Try to use it first. + */ + + Tcl_IncrRefCount(directory); + path = Tcl_FSJoinToPath(directory, 1, &fileNameObj); + Tcl_IncrRefCount(path); + Tcl_DecrRefCount(directory); + chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0); + Tcl_DecrRefCount(path); } /* - * The value returned from TclpNameOfExecutable is a UTF string that - * is possibly dirty depending on when it was initialized. - * TclFindEncodings will indicate whether we must "clean" the UTF (as - * reported by the underlying system). To assure that the UTF string - * is a properly encoded native string for this system, convert the - * UTF string to the default native encoding before the default - * encoding is initialized. Then, convert it back to UTF after the - * system encoding is loaded. + * Scan the search path until we find it. */ - - Tcl_UtfToExternalDString(NULL, name, -1, &buffer); - mustCleanUtf = TclFindEncodings(argv0); - /* - * Now it is OK to convert the native string back to UTF and set - * the value of the tclExecutableName. - */ - - if (mustCleanUtf) { - Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&buffer), -1, - &nameString); - tclExecutableName = (char *) - ckalloc((unsigned) (Tcl_DStringLength(&nameString) + 1)); - strcpy(tclExecutableName, Tcl_DStringValue(&nameString)); - - Tcl_DStringFree(&nameString); - } else { - tclExecutableName = (char *) ckalloc((unsigned) (strlen(name) + 1)); - strcpy(tclExecutableName, name); - } - Tcl_DStringFree(&buffer); - return; - - done: - (void) TclFindEncodings(argv0); + for (i=0; i<numDirs && (chan == NULL); i++) { + path = Tcl_FSJoinToPath(dir[i], 1, &fileNameObj); + Tcl_IncrRefCount(path); + chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0); + Tcl_DecrRefCount(path); + if (chan != NULL) { + /* + * Save directory in the cache. + */ + + map = Tcl_DuplicateObj(TclGetProcessGlobalValue(&encodingFileMap)); + Tcl_DictObjPut(NULL, map, nameObj, dir[i]); + TclSetProcessGlobalValue(&encodingFileMap, map, NULL); + } + } + + if ((NULL == chan) && (interp != NULL)) { + Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL); + } + Tcl_DecrRefCount(fileNameObj); + Tcl_DecrRefCount(nameObj); + Tcl_DecrRefCount(searchPath); + + return chan; } /* @@ -1272,50 +1562,34 @@ Tcl_FindExecutable(argv0) * * LoadEncodingFile -- * - * Read a file that describes an encoding and create a new Encoding - * from the data. + * Read a file that describes an encoding and create a new Encoding from + * the data. * * Results: - * The return value is the newly loaded Encoding, or NULL if - * the file didn't exist of was in the incorrect format. If NULL was - * returned, an error message is left in interp's result object, - * unless interp was NULL. + * The return value is the newly loaded Encoding, or NULL if the file + * didn't exist of was in the incorrect format. If NULL was returned, an + * error message is left in interp's result object, unless interp was + * NULL. * * Side effects: - * File read from disk. + * File read from disk. * *--------------------------------------------------------------------------- */ static Tcl_Encoding -LoadEncodingFile(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the encoding file on disk - * and also the name for new encoding. */ +LoadEncodingFile( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the encoding file on disk and + * also the name for new encoding. */ { - int objc, i, ch; - Tcl_Obj **objv; - Tcl_Obj *pathPtr; - Tcl_Channel chan; - Tcl_Encoding encoding; - - pathPtr = TclGetLibraryPath(); - if (pathPtr == NULL) { - goto unknown; - } - objc = 0; - Tcl_ListObjGetElements(NULL, pathPtr, &objc, &objv); - - chan = NULL; - for (i = 0; i < objc; i++) { - chan = OpenEncodingFile(Tcl_GetString(objv[i]), name); - if (chan != NULL) { - break; - } - } + Tcl_Channel chan = NULL; + Tcl_Encoding encoding = NULL; + int ch; + chan = OpenEncodingFileChannel(interp, name); if (chan == NULL) { - goto unknown; + return NULL; } Tcl_SetChannelOption(NULL, chan, "-encoding", "utf-8"); @@ -1332,89 +1606,26 @@ LoadEncodingFile(interp, name) } } - encoding = NULL; switch (ch) { - case 'S': { - encoding = LoadTableEncoding(interp, name, ENCODING_SINGLEBYTE, - chan); - break; - } - case 'D': { - encoding = LoadTableEncoding(interp, name, ENCODING_DOUBLEBYTE, - chan); - break; - } - case 'M': { - encoding = LoadTableEncoding(interp, name, ENCODING_MULTIBYTE, - chan); - break; - } - case 'E': { - encoding = LoadEscapeEncoding(name, chan); - break; - } + case 'S': + encoding = LoadTableEncoding(name, ENCODING_SINGLEBYTE, chan); + break; + case 'D': + encoding = LoadTableEncoding(name, ENCODING_DOUBLEBYTE, chan); + break; + case 'M': + encoding = LoadTableEncoding(name, ENCODING_MULTIBYTE, chan); + break; + case 'E': + encoding = LoadEscapeEncoding(name, chan); + break; } if ((encoding == NULL) && (interp != NULL)) { Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL); - if (ch == 'E') { - Tcl_AppendResult(interp, " or missing sub-encoding", NULL); - } } Tcl_Close(NULL, chan); - return encoding; - - unknown: - if (interp != NULL) { - Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); - } - return NULL; -} - -/* - *---------------------------------------------------------------------- - * - * OpenEncodingFile -- - * - * Look for the file encoding/<name>.enc in the specified - * directory. - * - * Results: - * Returns an open file channel if the file exists. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ -static Tcl_Channel -OpenEncodingFile(dir, name) - CONST char *dir; - CONST char *name; - -{ - CONST char *argv[3]; - Tcl_DString pathString; - CONST char *path; - Tcl_Channel chan; - Tcl_Obj *pathPtr; - - argv[0] = dir; - argv[1] = "encoding"; - argv[2] = name; - - Tcl_DStringInit(&pathString); - Tcl_JoinPath(3, argv, &pathString); - path = Tcl_DStringAppend(&pathString, ".enc", -1); - pathPtr = Tcl_NewStringObj(path,-1); - - Tcl_IncrRefCount(pathPtr); - chan = Tcl_FSOpenFileChannel(NULL, pathPtr, "r", 0); - Tcl_DecrRefCount(pathPtr); - - Tcl_DStringFree(&pathString); - - return chan; + return encoding; } /* @@ -1422,17 +1633,17 @@ OpenEncodingFile(dir, name) * * LoadTableEncoding -- * - * Helper function for LoadEncodingTable(). Loads a table to that - * converts between Unicode and some other encoding and creates an + * Helper function for LoadEncodingTable(). Loads a table to that + * converts between Unicode and some other encoding and creates an * encoding (using a TableEncoding structure) from that information. * - * File contains binary data, but begins with a marker to indicate - * byte-ordering, so that same binary file can be read on either - * endian platforms. + * File contains binary data, but begins with a marker to indicate + * byte-ordering, so that same binary file can be read on either endian + * platforms. * * Results: - * The return value is the new encoding, or NULL if the encoding - * could not be created (because the file contained invalid data). + * The return value is the new encoding, or NULL if the encoding could + * not be created (because the file contained invalid data). * * Side effects: * None. @@ -1441,11 +1652,10 @@ OpenEncodingFile(dir, name) */ static Tcl_Encoding -LoadTableEncoding(interp, name, type, chan) - Tcl_Interp *interp; /* Interp for temporary obj while reading. */ - CONST char *name; /* Name for new encoding. */ - int type; /* Type of encoding (ENCODING_?????). */ - Tcl_Channel chan; /* File containing new encoding. */ +LoadTableEncoding( + const char *name, /* Name for new encoding. */ + int type, /* Type of encoding (ENCODING_?????). */ + Tcl_Channel chan) /* File containing new encoding. */ { Tcl_DString lineString; Tcl_Obj *objPtr; @@ -1462,7 +1672,7 @@ LoadTableEncoding(interp, name, type, chan) * sequences in the encoding files. */ - static CONST char staticHex[] = { + static const char staticHex[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 ... 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 ... 31 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 ... 47 */ @@ -1507,9 +1717,9 @@ LoadTableEncoding(interp, name, type, chan) dataPtr->fallback = fallback; /* - * Read the table that maps characters to Unicode. Performs a single - * malloc to get the memory for the array and all the pages needed by - * the array. + * Read the table that maps characters to Unicode. Performs a single + * malloc to get the memory for the array and all the pages needed by the + * array. */ size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; @@ -1517,26 +1727,23 @@ LoadTableEncoding(interp, name, type, chan) memset(dataPtr->toUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); - if (interp == NULL) { - objPtr = Tcl_NewObj(); - } else { - objPtr = Tcl_GetObjResult(interp); - } + TclNewObj(objPtr); + Tcl_IncrRefCount(objPtr); for (i = 0; i < numPages; i++) { int ch; char *p; Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); p = Tcl_GetString(objPtr); - hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]]; + hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])]; dataPtr->toUnicode[hi] = pageMemPtr; p += 2; for (lo = 0; lo < 256; lo++) { if ((lo & 0x0f) == 0) { p++; } - ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8) - + (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]]; + ch = (staticHex[UCHAR(p[0])] << 12) + (staticHex[UCHAR(p[1])] << 8) + + (staticHex[UCHAR(p[2])] << 4) + staticHex[UCHAR(p[3])]; if (ch != 0) { used[ch >> 8] = 1; } @@ -1545,12 +1752,8 @@ LoadTableEncoding(interp, name, type, chan) p += 4; } } - if (interp == NULL) { - Tcl_DecrRefCount(objPtr); - } else { - Tcl_ResetResult(interp); - } - + TclDecrRefCount(objPtr); + if (type == ENCODING_DOUBLEBYTE) { memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes)); } else { @@ -1562,10 +1765,10 @@ LoadTableEncoding(interp, name, type, chan) } /* - * Invert toUnicode array to produce the fromUnicode array. Performs a - * single malloc to get the memory for the array and all the pages - * needed by the array. While reading in the toUnicode array, we - * remembered what pages that would be needed for the fromUnicode array. + * Invert toUnicode array to produce the fromUnicode array. Performs a + * single malloc to get the memory for the array and all the pages needed + * by the array. While reading in the toUnicode array, we remembered what + * pages that would be needed for the fromUnicode array. */ if (symbol) { @@ -1592,7 +1795,7 @@ LoadTableEncoding(interp, name, type, chan) ch = dataPtr->toUnicode[hi][lo]; if (ch != 0) { unsigned short *page; - + page = dataPtr->fromUnicode[ch >> 8]; if (page == NULL) { page = pageMemPtr; @@ -1607,7 +1810,7 @@ LoadTableEncoding(interp, name, type, chan) if (type == ENCODING_MULTIBYTE) { /* * If multibyte encodings don't have a backslash character, define - * one. Otherwise, on Windows, native file names won't work because + * one. Otherwise, on Windows, native file names won't work because * the backslash in the file name will map to the unknown character * (question mark) when converting from UTF-8 to external encoding. */ @@ -1620,16 +1823,15 @@ LoadTableEncoding(interp, name, type, chan) } if (symbol) { unsigned short *page; - + /* * Make a special symbol encoding that not only maps the symbol * characters from their Unicode code points down into page 0, but - * also ensure that the characters on page 0 map to themselves. - * This is so that a symbol font can be used to display a simple - * string like "abcd" and have alpha, beta, chi, delta show up, - * rather than have "unknown" chars show up because strictly - * speaking the symbol font doesn't have glyphs for those low ascii - * chars. + * also ensure that the characters on page 0 map to themselves. This + * is so that a symbol font can be used to display a simple string + * like "abcd" and have alpha, beta, chi, delta show up, rather than + * have "unknown" chars show up because strictly speaking the symbol + * font doesn't have glyphs for those low ascii chars. */ page = dataPtr->fromUnicode[0]; @@ -1648,15 +1850,23 @@ LoadTableEncoding(interp, name, type, chan) dataPtr->fromUnicode[hi] = emptyPage; } } + /* - * For trailing 'R'everse encoding, see [Patch #689341] + * For trailing 'R'everse encoding, see [Patch 689341] */ + Tcl_DStringInit(&lineString); do { int len; - /* skip leading empty lines */ - while ((len = Tcl_Gets(chan, &lineString)) == 0) - ; + + /* + * Skip leading empty lines. + */ + + while ((len = Tcl_Gets(chan, &lineString)) == 0) { + /* empty body */ + } + if (len < 0) { break; } @@ -1665,16 +1875,17 @@ LoadTableEncoding(interp, name, type, chan) break; } for (Tcl_DStringSetLength(&lineString, 0); - (len = Tcl_Gets(chan, &lineString)) >= 0; - Tcl_DStringSetLength(&lineString, 0)) { + (len = Tcl_Gets(chan, &lineString)) >= 0; + Tcl_DStringSetLength(&lineString, 0)) { unsigned char* p; int to, from; + if (len < 5) { continue; } p = (unsigned char*) Tcl_DStringValue(&lineString); to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) - + (staticHex[p[2]] << 4) + staticHex[p[3]]; + + (staticHex[p[2]] << 4) + staticHex[p[3]]; if (to == 0) { continue; } @@ -1696,6 +1907,7 @@ LoadTableEncoding(interp, name, type, chan) encType.freeProc = TableFreeProc; encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1; encType.clientData = (ClientData) dataPtr; + return Tcl_CreateEncoding(&encType); } @@ -1704,16 +1916,16 @@ LoadTableEncoding(interp, name, type, chan) * * LoadEscapeEncoding -- * - * Helper function for LoadEncodingTable(). Loads a state machine - * that converts between Unicode and some other encoding. + * Helper function for LoadEncodingTable(). Loads a state machine that + * converts between Unicode and some other encoding. * - * File contains text data that describes the escape sequences that - * are used to choose an encoding and the associated names for the + * File contains text data that describes the escape sequences that are + * used to choose an encoding and the associated names for the * sub-encodings. * * Results: - * The return value is the new encoding, or NULL if the encoding - * could not be created (because the file contained invalid data). + * The return value is the new encoding, or NULL if the encoding could + * not be created (because the file contained invalid data). * * Side effects: * None. @@ -1722,11 +1934,11 @@ LoadTableEncoding(interp, name, type, chan) */ static Tcl_Encoding -LoadEscapeEncoding(name, chan) - CONST char *name; /* Name for new encoding. */ - Tcl_Channel chan; /* File containing new encoding. */ +LoadEscapeEncoding( + const char *name, /* Name for new encoding. */ + Tcl_Channel chan) /* File containing new encoding. */ { - int i, missingSubEncoding = 0; + int i; unsigned int size; Tcl_DString escapeData; char init[16], final[16]; @@ -1739,21 +1951,21 @@ LoadEscapeEncoding(name, chan) while (1) { int argc; - CONST char **argv; + const char **argv; char *line; Tcl_DString lineString; - + Tcl_DStringInit(&lineString); if (Tcl_Gets(chan, &lineString) < 0) { break; } line = Tcl_DStringValue(&lineString); - if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { + if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { continue; } if (argc >= 2) { if (strcmp(argv[0], "name") == 0) { - ; + /* do nothing */ } else if (strcmp(argv[0], "init") == 0) { strncpy(init, argv[1], sizeof(init)); init[sizeof(init) - 1] = '\0'; @@ -1762,6 +1974,7 @@ LoadEscapeEncoding(name, chan) final[sizeof(final) - 1] = '\0'; } else { EscapeSubTable est; + Encoding *e; strncpy(est.sequence, argv[1], sizeof(est.sequence)); est.sequence[sizeof(est.sequence) - 1] = '\0'; @@ -1771,36 +1984,33 @@ LoadEscapeEncoding(name, chan) est.name[sizeof(est.name) - 1] = '\0'; /* - * Load the subencodings first so we're never stuck - * trying to use a half-loaded system encoding to - * open/read a *.enc file. + * To avoid infinite recursion in [encoding system iso2022-*] */ - est.encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, est.name); - if ((est.encodingPtr == NULL) - || (est.encodingPtr->toUtfProc != TableToUtfProc)) { - missingSubEncoding = 1; + e = (Encoding *) Tcl_GetEncoding(NULL, est.name); + if (e && e->toUtfProc != TableToUtfProc && + e->toUtfProc != Iso88591ToUtfProc) { + Tcl_FreeEncoding((Tcl_Encoding) e); + e = NULL; } + est.encodingPtr = e; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } } ckfree((char *) argv); Tcl_DStringFree(&lineString); } - if (missingSubEncoding) { - Tcl_DStringFree(&escapeData); - return NULL; - } - size = sizeof(EscapeEncodingData) - - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); + size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) + + Tcl_DStringLength(&escapeData); dataPtr = (EscapeEncodingData *) ckalloc(size); dataPtr->initLen = strlen(init); strcpy(dataPtr->init, init); dataPtr->finalLen = strlen(final); strcpy(dataPtr->final, final); - dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); - memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData), + dataPtr->numSubTables = + Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); + memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData), (size_t) Tcl_DStringLength(&escapeData)); Tcl_DStringFree(&escapeData); @@ -1830,9 +2040,9 @@ LoadEscapeEncoding(name, chan) * * BinaryProc -- * - * The default conversion when no other conversion is specified. - * No translation is done; source bytes are copied directly to - * destination bytes. + * The default conversion when no other conversion is specified. No + * translation is done; source bytes are copied directly to destination + * bytes. * * Results: * Returns TCL_OK if conversion was successful. @@ -1844,27 +2054,26 @@ LoadEscapeEncoding(name, chan) */ static int -BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string (unknown encoding). */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are +BinaryProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string (unknown encoding). */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the + int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { @@ -1883,18 +2092,17 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *srcReadPtr = srcLen; *dstWrotePtr = srcLen; *dstCharsPtr = srcLen; - memcpy((void *) dst, (void *) src, (size_t) srcLen); + memcpy(dst, src, (size_t) srcLen); return result; } - /* *------------------------------------------------------------------------- * * UtfExtToUtfIntProc -- * - * Convert from UTF-8 to UTF-8. While converting null-bytes from - * the Tcl's internal representation (0xc0, 0x80) to the official + * Convert from UTF-8 to UTF-8. While converting null-bytes from the + * Tcl's internal representation (0xc0, 0x80) to the official * representation (0x00). See UtfToUtfProc for details. * * Results: @@ -1905,36 +2113,36 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * *------------------------------------------------------------------------- */ -static int -UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + +static int +UtfIntToUtfExtProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string + char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, 1); + srcReadPtr, dstWrotePtr, dstCharsPtr, 1); } /* @@ -1942,9 +2150,9 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * UtfExtToUtfIntProc -- * - * Convert from UTF-8 to UTF-8 while converting null-bytes from - * the official representation (0x00) to Tcl's internal - * representation (0xc0, 0x80). See UtfToUtfProc for details. + * Convert from UTF-8 to UTF-8 while converting null-bytes from the + * official representation (0x00) to Tcl's internal representation (0xc0, + * 0x80). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. @@ -1954,36 +2162,35 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * *------------------------------------------------------------------------- */ -static int -UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are +static int +UtfExtToUtfIntProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, 0); + srcReadPtr, dstWrotePtr, dstCharsPtr, 0); } /* @@ -1991,9 +2198,9 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * UtfToUtfProc -- * - * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 - * translation is not a no-op, because it will turn a stream of - * improperly formed UTF-8 into a properly formed stream. + * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation + * is not a no-op, because it will turn a stream of improperly formed + * UTF-8 into a properly formed stream. * * Results: * Returns TCL_OK if conversion was successful. @@ -2004,45 +2211,43 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are +static int +UtfToUtfProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr, /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ - int pureNullMode; /* Convert embedded nulls from - * internal representation to real - * null-bytes or vice versa */ - + int pureNullMode) /* Convert embedded nulls from internal + * representation to real null-bytes or vice + * versa. */ { - CONST char *srcStart, *srcEnd, *srcClose; + const char *srcStart, *srcEnd, *srcClose; char *dstStart, *dstEnd; int result, numChars; Tcl_UniChar ch; result = TCL_OK; - + srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2067,26 +2272,26 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_NOSPACE; break; } - if (UCHAR(*src) < 0x80 && - !(UCHAR(*src) == 0 && pureNullMode == 0)) { + if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) { /* - * Copy 7bit chatacters, but skip null-bytes when we are - * in input mode, so that they get converted to 0xc080. + * Copy 7bit chatacters, but skip null-bytes when we are in input + * mode, so that they get converted to 0xc080. */ + *dst++ = *src++; - } else if (pureNullMode == 1 && - UCHAR(*src) == 0xc0 && - UCHAR(*(src+1)) == 0x80) { - /* + } else if (pureNullMode == 1 && UCHAR(*src) == 0xc0 && + UCHAR(*(src+1)) == 0x80) { + /* * Convert 0xc080 to real nulls when we are in output mode. */ + *dst++ = 0; src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { - /* Always check before using Tcl_UtfToUniChar. Not doing - * can so cause it run beyond the endof the buffer! If we - * happen such an incomplete char its bytes are made to - * represent themselves. + /* + * Always check before using Tcl_UtfToUniChar. Not doing can so + * cause it run beyond the endof the buffer! If we happen such an + * incomplete char its byts are made to represent themselves. */ ch = (unsigned char) *src; @@ -2098,7 +2303,7 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } } - *srcReadPtr = src - srcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; @@ -2120,35 +2325,34 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in Unicode. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are +static int +UnicodeToUtfProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in Unicode. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd; + const char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; Tcl_UniChar ch; @@ -2206,35 +2410,35 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are +static int +UtfToUnicodeProc( + ClientData clientData, /* TableEncodingData that specifies + * encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; + const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; Tcl_UniChar ch; @@ -2300,43 +2504,42 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies +static int +TableToUtfProc( + ClientData clientData, /* TableEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd; + const char *srcStart, *srcEnd; char *dstEnd, *dstStart, *prefixBytes; int result, byte, numChars; Tcl_UniChar ch; unsigned short **toUnicode; unsigned short *pageZero; TableEncodingData *dataPtr; - + srcStart = src; srcEnd = src + srcLen; @@ -2350,10 +2553,10 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { - if (dst > dstEnd) { - result = TCL_CONVERT_NOSPACE; - break; - } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } byte = *((unsigned char *) src); if (prefixBytes[byte]) { src++; @@ -2384,8 +2587,9 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } else { dst += Tcl_UniCharToUtf(ch, dst); } - src++; + src++; } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2409,48 +2613,47 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies +static int +TableFromUtfProc( + ClientData clientData, /* TableEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; + const char *srcStart, *srcEnd, *srcClose; char *dstStart, *dstEnd, *prefixBytes; Tcl_UniChar ch; int result, len, word, numChars; TableEncodingData *dataPtr; unsigned short **fromUnicode; - - result = TCL_OK; + + result = TCL_OK; dataPtr = (TableEncodingData *) clientData; prefixBytes = dataPtr->prefixBytes; fromUnicode = dataPtr->fromUnicode; - + srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2475,9 +2678,10 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, #if TCL_UTF_MAX > 3 /* - * This prevents a crash condition. More evaluation is required - * for full support of int Tcl_UniChar. [Bug 1004065] + * This prevents a crash condition. More evaluation is required for + * full support of int Tcl_UniChar. [Bug 1004065] */ + if (ch & 0xffff0000) { word = 0; } else @@ -2489,7 +2693,7 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_UNKNOWN; break; } - word = dataPtr->fallback; + word = dataPtr->fallback; } if (prefixBytes[(word >> 8)] != 0) { if (dst + 1 > dstEnd) { @@ -2506,9 +2710,194 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } dst[0] = (char) word; dst++; - } + } + src += len; + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} + +/* + *------------------------------------------------------------------------- + * + * Iso88591ToUtfProc -- + * + * Convert from the "iso8859-1" encoding into UTF-8. + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +Iso88591ToUtfProc( + ClientData clientData, /* Ignored. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd; + char *dstEnd, *dstStart; + int result, numChars; + + srcStart = src; + srcEnd = src + srcLen; + + dstStart = dst; + dstEnd = dst + dstLen - TCL_UTF_MAX; + + result = TCL_OK; + for (numChars = 0; src < srcEnd; numChars++) { + Tcl_UniChar ch; + + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + ch = (Tcl_UniChar) *((unsigned char *) src); + /* + * Special case for 1-byte utf chars for speed. + */ + if (ch && ch < 0x80) { + *dst++ = (char) ch; + } else { + dst += Tcl_UniCharToUtf(ch, dst); + } + src++; + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} + +/* + *------------------------------------------------------------------------- + * + * Iso88591FromUtfProc -- + * + * Convert from UTF-8 into the encoding "iso8859-1". + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +Iso88591FromUtfProc( + ClientData clientData, /* Ignored. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd, *srcClose; + char *dstStart, *dstEnd; + int result, numChars; + + result = TCL_OK; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= TCL_UTF_MAX; + } + + dstStart = dst; + dstEnd = dst + dstLen - 1; + + for (numChars = 0; src < srcEnd; numChars++) { + Tcl_UniChar ch; + int len; + + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + /* + * If there is more string to follow, this will ensure that the + * last UTF-8 character in the source buffer hasn't been cut off. + */ + + result = TCL_CONVERT_MULTIBYTE; + break; + } + len = TclUtfToUniChar(src, &ch); + + /* + * Check for illegal characters. + */ + + if (ch > 0xff) { + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_UNKNOWN; + break; + } + + /* + * Plunge on, using '?' as a fallback character. + */ + + ch = (Tcl_UniChar) '?'; + } + + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + *(dst++) = (char) ch; src += len; } + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2520,8 +2909,8 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * TableFreeProc -- * - * This procedure is invoked when an encoding is deleted. It deletes - * the memory used by the TableEncodingData. + * This function is invoked when an encoding is deleted. It deletes the + * memory used by the TableEncodingData. * * Results: * None. @@ -2533,14 +2922,14 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static void -TableFreeProc(clientData) - ClientData clientData; /* TableEncodingData that specifies +TableFreeProc( + ClientData clientData) /* TableEncodingData that specifies * encoding. */ { TableEncodingData *dataPtr; /* - * Make sure we aren't freeing twice on shutdown. [Bug #219314] + * Make sure we aren't freeing twice on shutdown. [Bug 219314] */ dataPtr = (TableEncodingData *) clientData; @@ -2566,32 +2955,31 @@ TableFreeProc(clientData) *------------------------------------------------------------------------- */ -static int -EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* EscapeEncodingData that specifies +static int +EscapeToUtfProc( + ClientData clientData, /* EscapeEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { @@ -2600,7 +2988,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, unsigned short **tableToUnicode; Encoding *encodingPtr; int state, result, numChars; - CONST char *srcStart, *srcEnd; + const char *srcStart, *srcEnd; char *dstStart, *dstEnd; result = TCL_OK; @@ -2618,7 +3006,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - state = (int) *statePtr; + state = PTR2INT(*statePtr); if (flags & TCL_ENCODING_START) { state = 0; } @@ -2626,54 +3014,56 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, for (numChars = 0; src < srcEnd; ) { int byte, hi, lo, ch; - if (dst > dstEnd) { - result = TCL_CONVERT_NOSPACE; - break; - } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } byte = *((unsigned char *) src); if (prefixBytes[byte]) { unsigned int left, len, longest; int checked, i; EscapeSubTable *subTablePtr; - + /* - * Saw the beginning of an escape sequence. + * Saw the beginning of an escape sequence. */ - + left = srcEnd - src; len = dataPtr->initLen; longest = len; checked = 0; + if (len <= left) { checked++; - if ((len > 0) && - (memcmp(src, dataPtr->init, len) == 0)) { + if ((len > 0) && (memcmp(src, dataPtr->init, len) == 0)) { /* * If we see initialization string, skip it, even if we're - * not at the beginning of the buffer. + * not at the beginning of the buffer. */ - + src += len; continue; } } + len = dataPtr->finalLen; if (len > longest) { longest = len; } + if (len <= left) { checked++; - if ((len > 0) && - (memcmp(src, dataPtr->final, len) == 0)) { + if ((len > 0) && (memcmp(src, dataPtr->final, len) == 0)) { /* * If we see finalization string, skip it, even if we're - * not at the end of the buffer. + * not at the end of the buffer. */ - + src += len; continue; } } + subTablePtr = dataPtr->subTables; for (i = 0; i < dataPtr->numSubTables; i++) { len = subTablePtr->sequenceLen; @@ -2682,7 +3072,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } if (len <= left) { checked++; - if ((len > 0) && + if ((len > 0) && (memcmp(src, subTablePtr->sequence, len) == 0)) { state = i; encodingPtr = NULL; @@ -2693,6 +3083,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } subTablePtr++; } + if (subTablePtr == NULL) { /* * A match was found, the escape sequence was consumed, and @@ -2703,9 +3094,9 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } /* - * We have a split-up or unrecognized escape sequence. If we - * checked all the sequences, then it's a syntax error, - * otherwise we need more bytes to determine a match. + * We have a split-up or unrecognized escape sequence. If we + * checked all the sequences, then it's a syntax error, otherwise + * we need more bytes to determine a match. */ if ((checked == dataPtr->numSubTables + 2) @@ -2733,6 +3124,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, tablePrefixBytes = tableDataPtr->prefixBytes; tableToUnicode = tableDataPtr->toUnicode; } + if (tablePrefixBytes[byte]) { src++; if (src >= srcEnd) { @@ -2746,13 +3138,14 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, hi = 0; lo = byte; } + ch = tableToUnicode[hi][lo]; dst += Tcl_UniCharToUtf(ch, dst); src++; numChars++; } - *statePtr = (Tcl_EncodingState) state; + *statePtr = (Tcl_EncodingState) INT2PTR(state); *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2776,45 +3169,44 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *------------------------------------------------------------------------- */ -static int -EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* EscapeEncodingData that specifies +static int +EscapeFromUtfProc( + ClientData clientData, /* EscapeEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store - * state information used during a piecewise - * conversion. Contents of statePtr are + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string - * is stored. */ - int dstLen; /* The maximum length of output buffer in + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { EscapeEncodingData *dataPtr; Encoding *encodingPtr; - CONST char *srcStart, *srcEnd, *srcClose; + const char *srcStart, *srcEnd, *srcClose; char *dstStart, *dstEnd; int state, result, numChars; TableEncodingData *tableDataPtr; char *tablePrefixBytes; unsigned short **tableFromUnicode; - - result = TCL_OK; + + result = TCL_OK; dataPtr = (EscapeEncodingData *) clientData; @@ -2830,7 +3222,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, /* * RFC1468 states that the text starts in ASCII, and switches to Japanese - * characters, and that the text must end in ASCII. [Patch #474358] + * characters, and that the text must end in ASCII. [Patch 474358] */ if (flags & TCL_ENCODING_START) { @@ -2840,11 +3232,10 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *dstWrotePtr = 0; return TCL_CONVERT_NOSPACE; } - memcpy((VOID *) dst, (VOID *) dataPtr->init, - (size_t) dataPtr->initLen); + memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen); dst += dataPtr->initLen; } else { - state = (int) *statePtr; + state = PTR2INT(*statePtr); } encodingPtr = GetTableEncoding(dataPtr, state); @@ -2856,7 +3247,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, unsigned int len; int word; Tcl_UniChar ch; - + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* * If there is more string to follow, this will ensure that the @@ -2872,7 +3263,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, if ((word == 0) && (ch != 0)) { int oldState; EscapeSubTable *subTablePtr; - + oldState = state; for (state = 0; state < dataPtr->numSubTables; state++) { encodingPtr = GetTableEncoding(dataPtr, state); @@ -2892,16 +3283,17 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, encodingPtr = GetTableEncoding(dataPtr, state); tableDataPtr = (TableEncodingData *) encodingPtr->clientData; word = tableDataPtr->fallback; - } - + } + tablePrefixBytes = tableDataPtr->prefixBytes; tableFromUnicode = tableDataPtr->fromUnicode; /* * The state variable has the value of oldState when word is 0. - * In this case, the escape sequense should not be copied to dst + * In this case, the escape sequense should not be copied to dst * because the current character set is not changed. */ + if (state != oldState) { subTablePtr = &dataPtr->subTables[state]; if ((dst + subTablePtr->sequenceLen) > dstEnd) { @@ -2911,11 +3303,12 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * variable because this escape sequence must be written * in the next conversion. */ + state = oldState; result = TCL_CONVERT_NOSPACE; break; } - memcpy((VOID *) dst, (VOID *) subTablePtr->sequence, + memcpy(dst, subTablePtr->sequence, (size_t) subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } @@ -2936,14 +3329,13 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } dst[0] = (char) word; dst++; - } + } src += len; } if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) { unsigned int len = dataPtr->subTables[0].sequenceLen; /* - * [Bug 1516109]. * Certain encodings like iso2022-jp need to write * an escape sequence after all characters have * been converted. This logic checks that enough @@ -2957,18 +3349,16 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_NOSPACE; } else { if (state) { - memcpy((VOID *) dst, (VOID *) dataPtr->subTables[0].sequence, - (size_t) len); + memcpy(dst, dataPtr->subTables[0].sequence, (size_t) len); dst += len; } - memcpy((VOID *) dst, (VOID *) dataPtr->final, - (size_t) dataPtr->finalLen); + memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen); dst += dataPtr->finalLen; state &= ~TCL_ENCODING_END; } } - *statePtr = (Tcl_EncodingState) state; + *statePtr = (Tcl_EncodingState) INT2PTR(state); *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2980,8 +3370,8 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * EscapeFreeProc -- * - * This procedure is invoked when an EscapeEncodingData encoding is - * deleted. It deletes the memory used by the encoding. + * This function is invoked when an EscapeEncodingData encoding is + * deleted. It deletes the memory used by the encoding. * * Results: * None. @@ -2993,8 +3383,9 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static void -EscapeFreeProc(clientData) - ClientData clientData; /* EscapeEncodingData that specifies encoding. */ +EscapeFreeProc( + ClientData clientData) /* EscapeEncodingData that specifies + * encoding. */ { EscapeEncodingData *dataPtr; EscapeSubTable *subTablePtr; @@ -3030,7 +3421,7 @@ EscapeFreeProc(clientData) * * GetTableEncoding -- * - * Helper function for the EscapeEncodingData conversions. Gets the + * Helper function for the EscapeEncodingData conversions. Gets the * encoding (of type TextEncodingData) that represents the specified * state. * @@ -3038,36 +3429,34 @@ EscapeFreeProc(clientData) * The return value is the encoding. * * Side effects: - * If the encoding that represents the specified state has not - * already been used by this EscapeEncoding, it will be loaded - * and cached in the dataPtr. + * If the encoding that represents the specified state has not already + * been used by this EscapeEncoding, it will be loaded and cached in the + * dataPtr. * *--------------------------------------------------------------------------- */ static Encoding * -GetTableEncoding(dataPtr, state) - EscapeEncodingData *dataPtr;/* Contains names of encodings. */ - int state; /* Index in dataPtr of desired Encoding. */ +GetTableEncoding( + EscapeEncodingData *dataPtr,/* Contains names of encodings. */ + int state) /* Index in dataPtr of desired Encoding. */ { EscapeSubTable *subTablePtr; Encoding *encodingPtr; - + subTablePtr = &dataPtr->subTables[state]; encodingPtr = subTablePtr->encodingPtr; + if (encodingPtr == NULL) { - /* - * Now that escape encodings load their sub-encodings first, and - * fail to load if any sub-encodings are missing, this branch should - * never happen. - */ encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name); - if ((encodingPtr == NULL) - || (encodingPtr->toUtfProc != TableToUtfProc)) { - panic("EscapeToUtfProc: invalid sub table"); + if ((encodingPtr == NULL) + || (encodingPtr->toUtfProc != TableToUtfProc + && encodingPtr->toUtfProc != Iso88591ToUtfProc)) { + Tcl_Panic("EscapeToUtfProc: invalid sub table"); } subTablePtr->encodingPtr = encodingPtr; } + return encodingPtr; } @@ -3076,9 +3465,9 @@ GetTableEncoding(dataPtr, state) * * unilen -- * - * A helper function for the Tcl_ExternalToUtf functions. This - * function is similar to strlen for double-byte characters: it - * returns the number of bytes in a 0x0000 terminated string. + * A helper function for the Tcl_ExternalToUtf functions. This function + * is similar to strlen for double-byte characters: it returns the number + * of bytes in a 0x0000 terminated string. * * Results: * As above. @@ -3090,8 +3479,8 @@ GetTableEncoding(dataPtr, state) */ static size_t -unilen(src) - CONST char *src; +unilen( + const char *src) { unsigned short *p; @@ -3105,83 +3494,76 @@ unilen(src) /* *------------------------------------------------------------------------- * - * TclFindEncodings -- + * InitializeEncodingSearchPath -- * - * Find and load the encoding file for this operating system. - * Before this is called, Tcl makes assumptions about the - * native string representation, but the true encoding is not - * assured. + * This is the fallback routine that sets the default value of the + * encoding search path if the application has not set one via a call to + * Tcl_SetEncodingSearchPath() by the first time the search path is needed + * to load encoding data. + * + * The default encoding search path is produced by taking each directory + * in the library path, appending a subdirectory named "encoding", and if + * the resulting directory exists, adding it to the encoding search path. * * Results: - * Return result of TclpInitLibraryPath, which reports whether the - * path is clean (0) or dirty (1) UTF. + * None. * * Side effects: - * Varied, see the respective initialization routines. + * Sets the encoding search path to an initial value. * *------------------------------------------------------------------------- */ -static int -TclFindEncodings(argv0) - CONST char *argv0; /* Name of executable from argv[0] to main() - * in native multi-byte encoding. */ +static void +InitializeEncodingSearchPath( + char **valuePtr, + int *lengthPtr, + Tcl_Encoding *encodingPtr) { - int mustCleanUtf = 0; - - if (encodingsInitialized == 0) { - /* - * Double check inside the mutex. There may be calls - * back into this routine from some of the procedures below. - */ - - TclpInitLock(); - if (encodingsInitialized == 0) { - char *native; - Tcl_Obj *pathPtr; - Tcl_DString libPath, buffer; - - /* - * Have to set this bit here to avoid deadlock with the - * routines below us that call into TclInitSubsystems. - */ - - encodingsInitialized = 1; - - native = TclpFindExecutable(argv0); - mustCleanUtf = TclpInitLibraryPath(native); - - /* - * The library path was set in the TclpInitLibraryPath routine. - * The string set is a dirty UTF string. To preserve the value - * convert the UTF string back to native before setting the new - * default encoding. - */ - - pathPtr = TclGetLibraryPath(); - if ((pathPtr != NULL) && mustCleanUtf) { - Tcl_UtfToExternalDString(NULL, Tcl_GetString(pathPtr), -1, - &libPath); - } - - TclpSetInitialEncodings(); - - /* - * Now convert the native string back to UTF. - */ - - if ((pathPtr != NULL) && mustCleanUtf) { - Tcl_ExternalToUtfDString(NULL, Tcl_DStringValue(&libPath), -1, - &buffer); - pathPtr = Tcl_NewStringObj(Tcl_DStringValue(&buffer), -1); - TclSetLibraryPath(pathPtr); - - Tcl_DStringFree(&libPath); - Tcl_DStringFree(&buffer); - } + char *bytes; + int i, numDirs, numBytes; + Tcl_Obj *libPath, *encodingObj, *searchPath; + + TclNewLiteralStringObj(encodingObj, "encoding"); + TclNewObj(searchPath); + Tcl_IncrRefCount(encodingObj); + Tcl_IncrRefCount(searchPath); + libPath = TclGetLibraryPath(); + Tcl_IncrRefCount(libPath); + Tcl_ListObjLength(NULL, libPath, &numDirs); + + for (i = 0; i < numDirs; i++) { + Tcl_Obj *directory, *path; + Tcl_StatBuf stat; + + Tcl_ListObjIndex(NULL, libPath, i, &directory); + path = Tcl_FSJoinToPath(directory, 1, &encodingObj); + Tcl_IncrRefCount(path); + if ((0 == Tcl_FSStat(path, &stat)) && S_ISDIR(stat.st_mode)) { + Tcl_ListObjAppendElement(NULL, searchPath, path); } - TclpInitUnlock(); + Tcl_DecrRefCount(path); + } + + Tcl_DecrRefCount(libPath); + Tcl_DecrRefCount(encodingObj); + *encodingPtr = libraryPath.encoding; + if (*encodingPtr) { + ((Encoding *)(*encodingPtr))->refCount++; } + bytes = Tcl_GetStringFromObj(searchPath, &numBytes); - return mustCleanUtf; + *lengthPtr = numBytes; + *valuePtr = ckalloc((unsigned int) numBytes + 1); + memcpy(*valuePtr, bytes, (size_t) numBytes + 1); + Tcl_DecrRefCount(searchPath); } + +/* + * Local Variables: + * mode: c + * c-basic-offset: 4 + * fill-column: 78 + * End: + */ + |