diff options
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 1653 |
1 files changed, 973 insertions, 680 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 8d2d1fd..d246cb2 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -5,15 +5,13 @@ * * Copyright (c) 1996-1998 Sun Microsystems, Inc. * - * See the file "license.terms" for information on usage and redistribution - * of this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * RCS: @(#) $Id: tclEncoding.c,v 1.37 2005/09/02 19:23:46 andreas_kupries Exp $ + * See the file "license.terms" for information on usage and redistribution of + * this file, and for a DISCLAIMER OF ALL WARRANTIES. */ #include "tclInt.h" -typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src)); +typedef size_t (LengthProc)(const char *src); /* * The following data structure represents an encoding, which describes how to @@ -21,28 +19,27 @@ typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src)); */ typedef struct Encoding { - char *name; /* Name of encoding. Malloced because (1) - * hash table entry that owns this encoding - * may be freed prior to this encoding being - * freed, (2) string passed in the - * Tcl_EncodingType structure may not be - * persistent. */ + char *name; /* Name of encoding. Malloced because (1) hash + * table entry that owns this encoding may be + * freed prior to this encoding being freed, + * (2) string passed in the Tcl_EncodingType + * structure may not be persistent. */ Tcl_EncodingConvertProc *toUtfProc; - /* Procedure to convert from external encoding + /* Function to convert from external encoding * into UTF-8. */ Tcl_EncodingConvertProc *fromUtfProc; - /* Procedure to convert from UTF-8 into + /* Function to convert from UTF-8 into * external encoding. */ Tcl_EncodingFreeProc *freeProc; - /* If non-NULL, procedure to call when this + /* If non-NULL, function to call when this * encoding is deleted. */ int nullSize; /* Number of 0x00 bytes that signify - * end-of-string in this encoding. This - * number is used to determine the source - * string length when the srcLen argument is - * negative. This number can be 1 or 2. */ + * end-of-string in this encoding. This number + * is used to determine the source string + * length when the srcLen argument is + * negative. This number can be 1 or 2. */ ClientData clientData; /* Arbitrary value associated with encoding - * type. Passed to conversion procedures. */ + * type. Passed to conversion functions. */ LengthProc *lengthProc; /* Function to compute length of * null-terminated strings in this encoding. * If nullSize is 1, this is strlen; if @@ -55,7 +52,7 @@ typedef struct Encoding { /* * The following structure is the clientData for a dynamically-loaded, - * table-driven encoding created by LoadTableEncoding(). It maps between + * table-driven encoding created by LoadTableEncoding(). It maps between * Unicode and a single-byte, double-byte, or multibyte (1 or 2 bytes only) * encoding. */ @@ -71,7 +68,7 @@ typedef struct TableEncodingData { unsigned short **toUnicode; /* Two dimensional sparse matrix to map * characters from the encoding to Unicode. * Each element of the toUnicode array points - * to an array of 256 shorts. If there is no + * to an array of 256 shorts. If there is no * corresponding character in Unicode, the * value in the matrix is 0x0000. * malloc'd. */ @@ -79,7 +76,7 @@ typedef struct TableEncodingData { /* Two dimensional sparse matrix to map * characters from Unicode to the encoding. * Each element of the fromUnicode array - * points to an array of 256 shorts. If there + * points to an array of 256 shorts. If there * is no corresponding character the encoding, * the value in the matrix is 0x0000. * malloc'd. */ @@ -89,13 +86,13 @@ typedef struct TableEncodingData { * The following structures is the clientData for a dynamically-loaded, * escape-driven encoding that is itself comprised of other simpler encodings. * An example is "iso-2022-jp", which uses escape sequences to switch between - * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" + * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven" * does not necessarily mean that the ESCAPE character is the character used * for switching character sets. */ typedef struct EscapeSubTable { - unsigned int sequenceLen; /* Length of following string. */ + unsigned sequenceLen; /* Length of following string. */ char sequence[16]; /* Escape code that marks this encoding. */ char name[32]; /* Name for encoding. */ Encoding *encodingPtr; /* Encoding loaded using above name, or NULL @@ -107,10 +104,10 @@ typedef struct EscapeEncodingData { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ - unsigned int initLen; /* Length of following string. */ + unsigned initLen; /* Length of following string. */ char init[16]; /* String to emit or expect before first char * in conversion. */ - unsigned int finalLen; /* Length of following string. */ + unsigned finalLen; /* Length of following string. */ char final[16]; /* String to emit or expect after last char in * conversion. */ char prefixBytes[256]; /* If a byte in the input stream is the first @@ -120,8 +117,8 @@ typedef struct EscapeEncodingData { * 0. */ int numSubTables; /* Length of following array. */ EscapeSubTable subTables[1];/* Information about each EscapeSubTable used - * by this encoding type. The actual size - * will be as large as necessary to hold all + * by this encoding type. The actual size will + * be as large as necessary to hold all * EscapeSubTables. */ } EscapeEncodingData; @@ -136,8 +133,8 @@ typedef struct EscapeEncodingData { #define ENCODING_ESCAPE 3 /* - * A list of directories in which Tcl should look for *.enc files. This list - * is shared by all threads. Access is governed by a mutex lock. + * A list of directories in which Tcl should look for *.enc files. This list + * is shared by all threads. Access is governed by a mutex lock. */ static TclInitProcessGlobalValueProc InitializeEncodingSearchPath; @@ -147,8 +144,8 @@ static ProcessGlobalValue encodingSearchPath = { /* * A map from encoding names to the directories in which their data files have - * been seen. The string value of the map is shared by all threads. Access - * to the shared string is governed by a mutex lock. + * been seen. The string value of the map is shared by all threads. Access to + * the shared string is governed by a mutex lock. */ static ProcessGlobalValue encodingFileMap = { @@ -156,9 +153,9 @@ static ProcessGlobalValue encodingFileMap = { }; /* - * A list of directories making up the "library path". Historically this + * A list of directories making up the "library path". Historically this * search path has served many uses, but the only one remaining is a base for - * the encodingSearchPath above. If the application does not explicitly set + * the encodingSearchPath above. If the application does not explicitly set * the encodingSearchPath, then it will be initialized by appending /encoding * to each directory in this "libraryPath". */ @@ -167,10 +164,10 @@ static ProcessGlobalValue libraryPath = { 0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL }; -static int encodingsInitialized = 0; +static int encodingsInitialized = 0; /* - * Hash table that keeps track of all loaded Encodings. Keys are the string + * Hash table that keeps track of all loaded Encodings. Keys are the string * names that represent the encoding, values are (Encoding *). */ @@ -185,6 +182,7 @@ TCL_DECLARE_MUTEX(encodingMutex) static Tcl_Encoding defaultEncoding; static Tcl_Encoding systemEncoding; +Tcl_Encoding tclIdentityEncoding; /* * The following variable is used in the sparse matrix code for a @@ -194,123 +192,130 @@ static Tcl_Encoding systemEncoding; static unsigned short emptyPage[256]; /* - * Procedures used only in this module. + * Functions used only in this module. */ -static int BinaryProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, +static int BinaryProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static void DupEncodingIntRep _ANSI_ARGS_((Tcl_Obj *srcPtr, - Tcl_Obj *dupPtr)); -static void EscapeFreeProc _ANSI_ARGS_((ClientData clientData)); -static int EscapeFromUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static void DupEncodingIntRep(Tcl_Obj *srcPtr, Tcl_Obj *dupPtr); +static void EscapeFreeProc(ClientData clientData); +static int EscapeFromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int EscapeToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int EscapeToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static void FillEncodingFileMap (); -static void FreeEncoding _ANSI_ARGS_((Tcl_Encoding encoding)); -static void FreeEncodingIntRep _ANSI_ARGS_((Tcl_Obj *objPtr)); -static Encoding * GetTableEncoding _ANSI_ARGS_(( - EscapeEncodingData *dataPtr, int state)); -static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp, - CONST char *name)); -static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((CONST char *name, - int type, Tcl_Channel chan)); -static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name, - Tcl_Channel chan)); -static Tcl_Channel OpenEncodingFileChannel _ANSI_ARGS_(( - Tcl_Interp *interp, CONST char *name)); -static void TableFreeProc _ANSI_ARGS_((ClientData clientData)); -static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static void FillEncodingFileMap(void); +static void FreeEncoding(Tcl_Encoding encoding); +static void FreeEncodingIntRep(Tcl_Obj *objPtr); +static Encoding * GetTableEncoding(EscapeEncodingData *dataPtr, + int state); +static Tcl_Encoding LoadEncodingFile(Tcl_Interp *interp, const char *name); +static Tcl_Encoding LoadTableEncoding(const char *name, int type, + Tcl_Channel chan); +static Tcl_Encoding LoadEscapeEncoding(const char *name, Tcl_Channel chan); +static Tcl_Channel OpenEncodingFileChannel(Tcl_Interp *interp, + const char *name); +static void TableFreeProc(ClientData clientData); +static int TableFromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int TableToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int TableToUtfProc(ClientData clientData, const char *src, + int srcLen, int flags, Tcl_EncodingState *statePtr, + char *dst, int dstLen, int *srcReadPtr, + int *dstWrotePtr, int *dstCharsPtr); +static size_t unilen(const char *src); +static int UnicodeToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static size_t unilen _ANSI_ARGS_((CONST char *src)); -static int UnicodeToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfToUnicodeProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfToUnicodeProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfToUtfProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr, int pureNullMode); +static int UtfIntToUtfExtProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr, int pureNullMode)); -static int UtfIntToUtfExtProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int UtfExtToUtfIntProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); -static int UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData, - CONST char *src, int srcLen, int flags, + int *dstCharsPtr); +static int Iso88591FromUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, - int *dstCharsPtr)); + int *dstCharsPtr); +static int Iso88591ToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, + Tcl_EncodingState *statePtr, char *dst, + int dstLen, int *srcReadPtr, int *dstWrotePtr, + int *dstCharsPtr); /* - * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep. This should - * help the lifetime of encodings be more useful. See concerns raised in [Bug - * 1077262]. + * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field + * of the intrep. This should help the lifetime of encodings be more useful. + * See concerns raised in [Bug 1077262]. */ -static Tcl_ObjType EncodingType = { +static const Tcl_ObjType encodingType = { "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL }; /* *---------------------------------------------------------------------- * - * TclGetEncodingFromObj -- + * Tcl_GetEncodingFromObj -- * * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), if - * possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR - * is returned, and if interp is non-NULL, an error message is written + * possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR is + * returned, and if interp is non-NULL, an error message is written * there. * * Results: * Standard Tcl return code. * * Side effects: - * Caches the Tcl_Encoding value as the internal rep of (*objPtr). + * Caches the Tcl_Encoding value as the internal rep of (*objPtr). * *---------------------------------------------------------------------- */ int -TclGetEncodingFromObj(interp, objPtr, encodingPtr) - Tcl_Interp *interp; - Tcl_Obj *objPtr; - Tcl_Encoding *encodingPtr; +Tcl_GetEncodingFromObj( + Tcl_Interp *interp, + Tcl_Obj *objPtr, + Tcl_Encoding *encodingPtr) { - CONST char *name = Tcl_GetString(objPtr); - if (objPtr->typePtr != &EncodingType) { + const char *name = Tcl_GetString(objPtr); + + if (objPtr->typePtr != &encodingType) { Tcl_Encoding encoding = Tcl_GetEncoding(interp, name); if (encoding == NULL) { return TCL_ERROR; } TclFreeIntRep(objPtr); - objPtr->internalRep.otherValuePtr = (VOID *) encoding; - objPtr->typePtr = &EncodingType; + objPtr->internalRep.twoPtrValue.ptr1 = encoding; + objPtr->typePtr = &encodingType; } *encodingPtr = Tcl_GetEncoding(NULL, name); return TCL_OK; @@ -327,10 +332,11 @@ TclGetEncodingFromObj(interp, objPtr, encodingPtr) */ static void -FreeEncodingIntRep(objPtr) - Tcl_Obj *objPtr; +FreeEncodingIntRep( + Tcl_Obj *objPtr) { - Tcl_FreeEncoding((Tcl_Encoding) objPtr->internalRep.otherValuePtr); + Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1); + objPtr->typePtr = NULL; } /* @@ -344,18 +350,17 @@ FreeEncodingIntRep(objPtr) */ static void -DupEncodingIntRep(srcPtr, dupPtr) - Tcl_Obj *srcPtr; - Tcl_Obj *dupPtr; +DupEncodingIntRep( + Tcl_Obj *srcPtr, + Tcl_Obj *dupPtr) { - dupPtr->internalRep.otherValuePtr = (VOID *) - Tcl_GetEncoding(NULL, srcPtr->bytes); + dupPtr->internalRep.twoPtrValue.ptr1 = Tcl_GetEncoding(NULL, srcPtr->bytes); } /* *---------------------------------------------------------------------- * - * TclGetEncodingSearchPath -- + * Tcl_GetEncodingSearchPath -- * * Keeps the per-thread copy of the encoding search path current with * changes to the global copy. @@ -367,14 +372,15 @@ DupEncodingIntRep(srcPtr, dupPtr) */ Tcl_Obj * -TclGetEncodingSearchPath() { +Tcl_GetEncodingSearchPath(void) +{ return TclGetProcessGlobalValue(&encodingSearchPath); } /* *---------------------------------------------------------------------- * - * TclSetEncodingSearchPath -- + * Tcl_SetEncodingSearchPath -- * * Keeps the per-thread copy of the encoding search path current with * changes to the global copy. @@ -383,8 +389,8 @@ TclGetEncodingSearchPath() { */ int -TclSetEncodingSearchPath(searchPath) - Tcl_Obj *searchPath; +Tcl_SetEncodingSearchPath( + Tcl_Obj *searchPath) { int dummy; @@ -410,7 +416,8 @@ TclSetEncodingSearchPath(searchPath) */ Tcl_Obj * -TclGetLibraryPath() { +TclGetLibraryPath(void) +{ return TclGetProcessGlobalValue(&libraryPath); } @@ -423,15 +430,15 @@ TclGetLibraryPath() { * the global copy. * * NOTE: this routine returns void, so there's no way to report the error - * that searchPath is not a valid list. In that case, this routine will + * that searchPath is not a valid list. In that case, this routine will * silently do nothing. * *---------------------------------------------------------------------- */ void -TclSetLibraryPath(path) - Tcl_Obj *path; +TclSetLibraryPath( + Tcl_Obj *path) { int dummy; @@ -446,8 +453,8 @@ TclSetLibraryPath(path) * * FillEncodingFileMap -- * - * Called to bring the encoding file map in sync with the current value - * of the encoding search path. + * Called to bring the encoding file map in sync with the current value + * of the encoding search path. * * Scan the directories on the encoding search path, find the *.enc * files, and store the found pathnames in a map associated with the @@ -468,12 +475,12 @@ TclSetLibraryPath(path) */ static void -FillEncodingFileMap() +FillEncodingFileMap(void) { int i, numDirs = 0; Tcl_Obj *map, *searchPath; - searchPath = TclGetEncodingSearchPath(); + searchPath = Tcl_GetEncodingSearchPath(); Tcl_IncrRefCount(searchPath); Tcl_ListObjLength(NULL, searchPath, &numDirs); map = Tcl_NewDictObj(); @@ -500,12 +507,12 @@ FillEncodingFileMap() Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev); for (j=0; j<numFiles; j++) { - Tcl_Obj *encodingName, *file; + Tcl_Obj *encodingName, *fileObj; - file = TclPathPart(NULL, filev[j], TCL_PATH_TAIL); - encodingName = TclPathPart(NULL, file, TCL_PATH_ROOT); + fileObj = TclPathPart(NULL, filev[j], TCL_PATH_TAIL); + encodingName = TclPathPart(NULL, fileObj, TCL_PATH_ROOT); Tcl_DictObjPut(NULL, map, encodingName, directory); - Tcl_DecrRefCount(file); + Tcl_DecrRefCount(fileObj); Tcl_DecrRefCount(encodingName); } Tcl_DecrRefCount(matchFileList); @@ -534,9 +541,12 @@ FillEncodingFileMap() */ void -TclInitEncodingSubsystem() +TclInitEncodingSubsystem(void) { Tcl_EncodingType type; + TableEncodingData *dataPtr; + unsigned size; + unsigned short i; if (encodingsInitialized) { return; @@ -547,7 +557,7 @@ TclInitEncodingSubsystem() Tcl_MutexUnlock(&encodingMutex); /* - * Create a few initial encodings. Note that the UTF-8 to UTF-8 + * Create a few initial encodings. Note that the UTF-8 to UTF-8 * translation is not a no-op, because it will turn a stream of improperly * formed UTF-8 into a properly formed stream. */ @@ -558,9 +568,7 @@ TclInitEncodingSubsystem() type.freeProc = NULL; type.nullSize = 1; type.clientData = NULL; - - defaultEncoding = Tcl_CreateEncoding(&type); - systemEncoding = Tcl_GetEncoding(NULL, type.encodingName); + tclIdentityEncoding = Tcl_CreateEncoding(&type); type.encodingName = "utf-8"; type.toUtfProc = UtfExtToUtfIntProc; @@ -578,6 +586,44 @@ TclInitEncodingSubsystem() type.clientData = NULL; Tcl_CreateEncoding(&type); + /* + * Need the iso8859-1 encoding in order to process binary data, so force + * it to always be embedded. Note that this encoding *must* be a proper + * table encoding or some of the escape encodings crash! Hence the ugly + * code to duplicate the structure of a table encoding here. + */ + + dataPtr = ckalloc(sizeof(TableEncodingData)); + memset(dataPtr, 0, sizeof(TableEncodingData)); + dataPtr->fallback = '?'; + + size = 256*(sizeof(unsigned short *) + sizeof(unsigned short)); + dataPtr->toUnicode = ckalloc(size); + memset(dataPtr->toUnicode, 0, size); + dataPtr->fromUnicode = ckalloc(size); + memset(dataPtr->fromUnicode, 0, size); + + dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256); + dataPtr->fromUnicode[0] = (unsigned short *) (dataPtr->fromUnicode + 256); + for (i=1 ; i<256 ; i++) { + dataPtr->toUnicode[i] = emptyPage; + dataPtr->fromUnicode[i] = emptyPage; + } + + for (i=0 ; i<256 ; i++) { + dataPtr->toUnicode[0][i] = i; + dataPtr->fromUnicode[0][i] = i; + } + + type.encodingName = "iso8859-1"; + type.toUtfProc = Iso88591ToUtfProc; + type.fromUtfProc = Iso88591FromUtfProc; + type.freeProc = TableFreeProc; + type.nullSize = 1; + type.clientData = dataPtr; + defaultEncoding = Tcl_CreateEncoding(&type); + systemEncoding = Tcl_GetEncoding(NULL, type.encodingName); + encodingsInitialized = 1; } @@ -598,7 +644,7 @@ TclInitEncodingSubsystem() */ void -TclFinalizeEncodingSubsystem() +TclFinalizeEncodingSubsystem(void) { Tcl_HashSearch search; Tcl_HashEntry *hPtr; @@ -606,17 +652,18 @@ TclFinalizeEncodingSubsystem() Tcl_MutexLock(&encodingMutex); encodingsInitialized = 0; FreeEncoding(systemEncoding); + FreeEncoding(tclIdentityEncoding); hPtr = Tcl_FirstHashEntry(&encodingTable, &search); while (hPtr != NULL) { /* * Call FreeEncoding instead of doing it directly to handle refcounts - * like escape encodings use. [Bug #524674] Make sure to call + * like escape encodings use. [Bug 524674] Make sure to call * Tcl_FirstHashEntry repeatedly so that all encodings are eventually * cleaned up. */ - FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr)); + FreeEncoding(Tcl_GetHashValue(hPtr)); hPtr = Tcl_FirstHashEntry(&encodingTable, &search); } @@ -629,24 +676,24 @@ TclFinalizeEncodingSubsystem() * * Tcl_GetDefaultEncodingDir -- * - * Legacy public interface to retrieve first directory in the encoding - * searchPath. + * Legacy public interface to retrieve first directory in the encoding + * searchPath. * * Results: * The directory pathname, as a string, or NULL for an empty encoding * search path. * * Side effects: - * None. + * None. * *------------------------------------------------------------------------- */ -CONST char * -Tcl_GetDefaultEncodingDir() +const char * +Tcl_GetDefaultEncodingDir(void) { int numDirs; - Tcl_Obj *first, *searchPath = TclGetEncodingSearchPath(); + Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath(); Tcl_ListObjLength(NULL, searchPath, &numDirs); if (numDirs == 0) { @@ -662,28 +709,28 @@ Tcl_GetDefaultEncodingDir() * * Tcl_SetDefaultEncodingDir -- * - * Legacy public interface to set the first directory in the encoding - * search path. + * Legacy public interface to set the first directory in the encoding + * search path. * * Results: - * None. + * None. * * Side effects: - * Modifies the encoding search path. + * Modifies the encoding search path. * *------------------------------------------------------------------------- */ void -Tcl_SetDefaultEncodingDir(path) - CONST char *path; +Tcl_SetDefaultEncodingDir( + const char *path) { - Tcl_Obj *searchPath = TclGetEncodingSearchPath(); + Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath(); Tcl_Obj *directory = Tcl_NewStringObj(path, -1); searchPath = Tcl_DuplicateObj(searchPath); Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); - TclSetEncodingSearchPath(searchPath); + Tcl_SetEncodingSearchPath(searchPath); } /* @@ -692,19 +739,19 @@ Tcl_SetDefaultEncodingDir(path) * Tcl_GetEncoding -- * * Given the name of a encoding, find the corresponding Tcl_Encoding - * token. If the encoding did not already exist, Tcl attempts to + * token. If the encoding did not already exist, Tcl attempts to * dynamically load an encoding by that name. * * Results: - * Returns a token that represents the encoding. If the name didn't - * refer to any known or loadable encoding, NULL is returned. If NULL - * was returned, an error message is left in interp's result object, - * unless interp was NULL. + * Returns a token that represents the encoding. If the name didn't refer + * to any known or loadable encoding, NULL is returned. If NULL was + * returned, an error message is left in interp's result object, unless + * interp was NULL. * * Side effects: * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to this - * procedure, there should eventually be a call to Tcl_FreeEncoding, so + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so * that the database can be cleaned up when encodings aren't needed * anymore. * @@ -712,9 +759,9 @@ Tcl_SetDefaultEncodingDir(path) */ Tcl_Encoding -Tcl_GetEncoding(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the desired encoding. */ +Tcl_GetEncoding( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the desired encoding. */ { Tcl_HashEntry *hPtr; Encoding *encodingPtr; @@ -729,7 +776,7 @@ Tcl_GetEncoding(interp, name) hPtr = Tcl_FindHashEntry(&encodingTable, name); if (hPtr != NULL) { - encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); + encodingPtr = Tcl_GetHashValue(hPtr); encodingPtr->refCount++; Tcl_MutexUnlock(&encodingMutex); return (Tcl_Encoding) encodingPtr; @@ -744,7 +791,7 @@ Tcl_GetEncoding(interp, name) * * Tcl_FreeEncoding -- * - * This procedure is called to release an encoding allocated by + * This function is called to release an encoding allocated by * Tcl_CreateEncoding() or Tcl_GetEncoding(). * * Results: @@ -758,8 +805,8 @@ Tcl_GetEncoding(interp, name) */ void -Tcl_FreeEncoding(encoding) - Tcl_Encoding encoding; +Tcl_FreeEncoding( + Tcl_Encoding encoding) { Tcl_MutexLock(&encodingMutex); FreeEncoding(encoding); @@ -771,7 +818,7 @@ Tcl_FreeEncoding(encoding) * * FreeEncoding -- * - * This procedure is called to release an encoding by procedures that + * This function is called to release an encoding by functions that * already have the encodingMutex. * * Results: @@ -785,25 +832,27 @@ Tcl_FreeEncoding(encoding) */ static void -FreeEncoding(encoding) - Tcl_Encoding encoding; +FreeEncoding( + Tcl_Encoding encoding) { - Encoding *encodingPtr; + Encoding *encodingPtr = (Encoding *) encoding; - encodingPtr = (Encoding *) encoding; if (encodingPtr == NULL) { return; } + if (encodingPtr->refCount<=0) { + Tcl_Panic("FreeEncoding: refcount problem !!!"); + } encodingPtr->refCount--; if (encodingPtr->refCount == 0) { if (encodingPtr->freeProc != NULL) { - (*encodingPtr->freeProc)(encodingPtr->clientData); + encodingPtr->freeProc(encodingPtr->clientData); } if (encodingPtr->hPtr != NULL) { Tcl_DeleteHashEntry(encodingPtr->hPtr); } - ckfree((char *) encodingPtr->name); - ckfree((char *) encodingPtr); + ckfree(encodingPtr->name); + ckfree(encodingPtr); } } @@ -824,9 +873,9 @@ FreeEncoding(encoding) *--------------------------------------------------------------------------- */ -CONST char * -Tcl_GetEncodingName(encoding) - Tcl_Encoding encoding; /* The encoding whose name to fetch. */ +const char * +Tcl_GetEncodingName( + Tcl_Encoding encoding) /* The encoding whose name to fetch. */ { if (encoding == NULL) { encoding = systemEncoding; @@ -854,8 +903,8 @@ Tcl_GetEncodingName(encoding) */ void -Tcl_GetEncodingNames(interp) - Tcl_Interp *interp; /* Interp to hold result. */ +Tcl_GetEncodingNames( + Tcl_Interp *interp) /* Interp to hold result. */ { Tcl_HashTable table; Tcl_HashSearch search; @@ -873,9 +922,10 @@ Tcl_GetEncodingNames(interp) Tcl_MutexLock(&encodingMutex); for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL; hPtr = Tcl_NextHashEntry(&search)) { - Encoding *encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); + Encoding *encodingPtr = Tcl_GetHashValue(hPtr); + Tcl_CreateHashEntry(&table, - (char *) Tcl_NewStringObj(encodingPtr->name, -1), &dummy); + Tcl_NewStringObj(encodingPtr->name, -1), &dummy); } Tcl_MutexUnlock(&encodingMutex); @@ -888,7 +938,7 @@ Tcl_GetEncodingNames(interp) Tcl_DictObjFirst(NULL, map, &mapSearch, &name, NULL, &done); for (; !done; Tcl_DictObjNext(&mapSearch, &name, NULL, &done)) { - Tcl_CreateHashEntry(&table, (char *) name, &dummy); + Tcl_CreateHashEntry(&table, name, &dummy); } /* @@ -910,18 +960,18 @@ Tcl_GetEncodingNames(interp) * Tcl_SetSystemEncoding -- * * Sets the default encoding that should be used whenever the user passes - * a NULL value in to one of the conversion routines. If the supplied + * a NULL value in to one of the conversion routines. If the supplied * name is NULL, the system encoding is reset to the default system * encoding. * * Results: * The return value is TCL_OK if the system encoding was successfully set - * to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR + * to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR * is returned, an error message is left in interp's result object, * unless interp was NULL. * * Side effects: - * The reference count of the new system encoding is incremented. The + * The reference count of the new system encoding is incremented. The * reference count of the old system encoding is decremented and it may * be freed. * @@ -929,15 +979,15 @@ Tcl_GetEncodingNames(interp) */ int -Tcl_SetSystemEncoding(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the desired encoding, or NULL +Tcl_SetSystemEncoding( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the desired encoding, or NULL/"" * to reset to default encoding. */ { Tcl_Encoding encoding; Encoding *encodingPtr; - if (name == NULL) { + if (!name || !*name) { Tcl_MutexLock(&encodingMutex); encoding = defaultEncoding; encodingPtr = (Encoding *) encoding; @@ -963,21 +1013,21 @@ Tcl_SetSystemEncoding(interp, name) * * Tcl_CreateEncoding -- * - * This procedure is called to define a new encoding and the procedures + * This function is called to define a new encoding and the functions * that are used to convert between the specified encoding and Unicode. * * Results: - * Returns a token that represents the encoding. If an encoding with the + * Returns a token that represents the encoding. If an encoding with the * same name already existed, the old encoding token remains valid and * continues to behave as it used to, and will eventually be garbage - * collected when the last reference to it goes away. Any subsequent + * collected when the last reference to it goes away. Any subsequent * calls to Tcl_GetEncoding with the specified name will retrieve the * most recent encoding token. * * Side effects: * The new encoding type is entered into a table visible to all - * interpreters, keyed off the encoding's name. For each call to this - * procedure, there should eventually be a call to Tcl_FreeEncoding, so + * interpreters, keyed off the encoding's name. For each call to this + * function, there should eventually be a call to Tcl_FreeEncoding, so * that the database can be cleaned up when encodings aren't needed * anymore. * @@ -985,29 +1035,30 @@ Tcl_SetSystemEncoding(interp, name) */ Tcl_Encoding -Tcl_CreateEncoding(typePtr) - Tcl_EncodingType *typePtr; /* The encoding type. */ +Tcl_CreateEncoding( + const Tcl_EncodingType *typePtr) + /* The encoding type. */ { Tcl_HashEntry *hPtr; - int new; + int isNew; Encoding *encodingPtr; char *name; Tcl_MutexLock(&encodingMutex); - hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new); - if (new == 0) { + hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &isNew); + if (isNew == 0) { /* * Remove old encoding from hash table, but don't delete it until last * reference goes away. */ - encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr); + encodingPtr = Tcl_GetHashValue(hPtr); encodingPtr->hPtr = NULL; } - name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1); + name = ckalloc(strlen(typePtr->encodingName) + 1); - encodingPtr = (Encoding *) ckalloc(sizeof(Encoding)); + encodingPtr = ckalloc(sizeof(Encoding)); encodingPtr->name = strcpy(name, typePtr->encodingName); encodingPtr->toUtfProc = typePtr->toUtfProc; encodingPtr->fromUtfProc = typePtr->fromUtfProc; @@ -1050,18 +1101,18 @@ Tcl_CreateEncoding(typePtr) */ char * -Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) - Tcl_Encoding encoding; /* The encoding for the source string, or NULL +Tcl_ExternalToUtfDString( + Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes, or < 0 for + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - Tcl_DString *dstPtr; /* Uninitialized or free DString in which the + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { char *dst; Tcl_EncodingState state; - Encoding *encodingPtr; + const Encoding *encodingPtr; int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; Tcl_DStringInit(dstPtr); @@ -1076,15 +1127,14 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) if (src == NULL) { srcLen = 0; } else if (srcLen < 0) { - srcLen = (*encodingPtr->lengthProc)(src); + srcLen = encodingPtr->lengthProc(src); } flags = TCL_ENCODING_START | TCL_ENCODING_END; while (1) { - result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, - srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, - &dstChars); + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, + flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); if (result != TCL_CONVERT_NOSPACE) { @@ -1123,37 +1173,36 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr) */ int -Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, - dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) - Tcl_Interp *interp; /* Interp for error return, if not NULL. */ - Tcl_Encoding encoding; /* The encoding for the source string, or NULL +Tcl_ExternalToUtf( + Tcl_Interp *interp, /* Interp for error return, if not NULL. */ + Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes, or < 0 for + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes, or < 0 for * encoding-specific string length. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - Encoding *encodingPtr; + const Encoding *encodingPtr; int result, srcRead, dstWrote, dstChars; Tcl_EncodingState state; @@ -1165,7 +1214,7 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, if (src == NULL) { srcLen = 0; } else if (srcLen < 0) { - srcLen = (*encodingPtr->lengthProc)(src); + srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { flags |= TCL_ENCODING_START | TCL_ENCODING_END; @@ -1188,7 +1237,7 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, */ dstLen--; - result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src, srcLen, + result = encodingPtr->toUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); dst[*dstWrotePtr] = '\0'; @@ -1201,14 +1250,14 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, * * Tcl_UtfToExternalDString -- * - * Convert a source buffer from UTF-8 into the specified encoding. If - * any of the bytes in the source buffer are invalid or cannot be - * represented in the target encoding, a default fallback character will - * be substituted. + * Convert a source buffer from UTF-8 into the specified encoding. If any + * of the bytes in the source buffer are invalid or cannot be represented + * in the target encoding, a default fallback character will be + * substituted. * * Results: * The converted bytes are stored in the DString, which is then NULL - * terminated in an encoding-specific manner. The return value is a + * terminated in an encoding-specific manner. The return value is a * pointer to the value stored in the DString. * * Side effects: @@ -1218,18 +1267,18 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst, */ char * -Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) - Tcl_Encoding encoding; /* The encoding for the converted string, or +Tcl_UtfToExternalDString( + Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes, or < 0 for + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - Tcl_DString *dstPtr; /* Uninitialized or free DString in which the + Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ { char *dst; Tcl_EncodingState state; - Encoding *encodingPtr; + const Encoding *encodingPtr; int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; Tcl_DStringInit(dstPtr); @@ -1248,7 +1297,7 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) } flags = TCL_ENCODING_START | TCL_ENCODING_END; while (1) { - result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote, &dstChars); soFar = dst + dstWrote - Tcl_DStringValue(dstPtr); @@ -1292,37 +1341,36 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr) */ int -Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, - dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr) - Tcl_Interp *interp; /* Interp for error return, if not NULL. */ - Tcl_Encoding encoding; /* The encoding for the converted string, or +Tcl_UtfToExternal( + Tcl_Interp *interp, /* Interp for error return, if not NULL. */ + Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes, or < 0 for + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes, or < 0 for * strlen(). */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string + char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - Encoding *encodingPtr; + const Encoding *encodingPtr; int result, srcRead, dstWrote, dstChars; Tcl_EncodingState state; @@ -1351,7 +1399,7 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, } dstLen -= encodingPtr->nullSize; - result = (*encodingPtr->fromUtfProc)(encodingPtr->clientData, src, srcLen, + result = encodingPtr->fromUtfProc(encodingPtr->clientData, src, srcLen, flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr); if (encodingPtr->nullSize == 2) { @@ -1367,7 +1415,7 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, * * Tcl_FindExecutable -- * - * This procedure computes the absolute path name of the current + * This function computes the absolute path name of the current * application, given its argv[0] value. * * Results: @@ -1379,10 +1427,10 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst, * *--------------------------------------------------------------------------- */ - +#undef Tcl_FindExecutable void -Tcl_FindExecutable(argv0) - CONST char *argv0; /* The value of the application's argv[0] +Tcl_FindExecutable( + const char *argv0) /* The value of the application's argv[0] * (native). */ { TclInitSubsystems(); @@ -1398,27 +1446,26 @@ Tcl_FindExecutable(argv0) * Open the file believed to hold data for the encoding, "name". * * Results: - * Returns the readable Tcl_Channel from opening the file, or NULL if the - * file could not be successfully opened. If NULL was * returned, an - * error message is left in interp's result object, * unless interp was - * NULL. + * Returns the readable Tcl_Channel from opening the file, or NULL if the + * file could not be successfully opened. If NULL was returned, an error + * message is left in interp's result object, unless interp was NULL. * * Side effects: - * Channel may be opened. Information about the filesystem may be cached + * Channel may be opened. Information about the filesystem may be cached * to speed later calls. * *--------------------------------------------------------------------------- */ static Tcl_Channel -OpenEncodingFileChannel(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the encoding file on disk and +OpenEncodingFileChannel( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the encoding file on disk and * also the name for new encoding. */ { Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1); Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj); - Tcl_Obj *searchPath = Tcl_DuplicateObj(TclGetEncodingSearchPath()); + Tcl_Obj *searchPath = Tcl_DuplicateObj(Tcl_GetEncodingSearchPath()); Tcl_Obj *map = TclGetProcessGlobalValue(&encodingFileMap); Tcl_Obj **dir, *path, *directory = NULL; Tcl_Channel chan = NULL; @@ -1443,7 +1490,8 @@ OpenEncodingFileChannel(interp, name) } } if (!verified) { - CONST char *dirString = Tcl_GetString(directory); + const char *dirString = Tcl_GetString(directory); + for (i=0; i<numDirs && !verified; i++) { if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) { verified = 1; @@ -1452,7 +1500,7 @@ OpenEncodingFileChannel(interp, name) } if (!verified) { /* - * Directory no longer on the search path. Remove from cache. + * Directory no longer on the search path. Remove from cache. */ map = Tcl_DuplicateObj(map); @@ -1464,7 +1512,7 @@ OpenEncodingFileChannel(interp, name) if (NULL != directory) { /* - * Got a directory from the cache. Try to use it first. + * Got a directory from the cache. Try to use it first. */ Tcl_IncrRefCount(directory); @@ -1496,7 +1544,9 @@ OpenEncodingFileChannel(interp, name) } if ((NULL == chan) && (interp != NULL)) { - Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "unknown encoding \"%s\"", name)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL); } Tcl_DecrRefCount(fileNameObj); Tcl_DecrRefCount(nameObj); @@ -1515,7 +1565,7 @@ OpenEncodingFileChannel(interp, name) * * Results: * The return value is the newly loaded Encoding, or NULL if the file - * didn't exist of was in the incorrect format. If NULL was returned, an + * didn't exist of was in the incorrect format. If NULL was returned, an * error message is left in interp's result object, unless interp was * NULL. * @@ -1526,9 +1576,9 @@ OpenEncodingFileChannel(interp, name) */ static Tcl_Encoding -LoadEncodingFile(interp, name) - Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */ - CONST char *name; /* The name of the encoding file on disk and +LoadEncodingFile( + Tcl_Interp *interp, /* Interp for error reporting, if not NULL. */ + const char *name) /* The name of the encoding file on disk and * also the name for new encoding. */ { Tcl_Channel chan = NULL; @@ -1569,7 +1619,9 @@ LoadEncodingFile(interp, name) break; } if ((encoding == NULL) && (interp != NULL)) { - Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL); + Tcl_SetObjResult(interp, Tcl_ObjPrintf( + "invalid encoding file \"%s\"", name)); + Tcl_SetErrorCode(interp, "TCL", "LOOKUP", "ENCODING", name, NULL); } Tcl_Close(NULL, chan); @@ -1581,7 +1633,7 @@ LoadEncodingFile(interp, name) * * LoadTableEncoding -- * - * Helper function for LoadEncodingTable(). Loads a table to that + * Helper function for LoadEncodingTable(). Loads a table to that * converts between Unicode and some other encoding and creates an * encoding (using a TableEncoding structure) from that information. * @@ -1600,19 +1652,19 @@ LoadEncodingFile(interp, name) */ static Tcl_Encoding -LoadTableEncoding(name, type, chan) - CONST char *name; /* Name for new encoding. */ - int type; /* Type of encoding (ENCODING_?????). */ - Tcl_Channel chan; /* File containing new encoding. */ +LoadTableEncoding( + const char *name, /* Name for new encoding. */ + int type, /* Type of encoding (ENCODING_?????). */ + Tcl_Channel chan) /* File containing new encoding. */ { Tcl_DString lineString; Tcl_Obj *objPtr; char *line; - int i, hi, lo, numPages, symbol, fallback; + int i, hi, lo, numPages, symbol, fallback, len; unsigned char used[256]; - unsigned int size; + unsigned size; TableEncodingData *dataPtr; - unsigned short *pageMemPtr; + unsigned short *pageMemPtr, *page; Tcl_EncodingType encType; /* @@ -1620,7 +1672,7 @@ LoadTableEncoding(name, type, chan) * sequences in the encoding files. */ - static char staticHex[] = { + static const char staticHex[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0 ... 15 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 16 ... 31 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 32 ... 47 */ @@ -1659,19 +1711,19 @@ LoadTableEncoding(name, type, chan) #undef PAGESIZE #define PAGESIZE (256 * sizeof(unsigned short)) - dataPtr = (TableEncodingData *) ckalloc(sizeof(TableEncodingData)); + dataPtr = ckalloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = fallback; /* - * Read the table that maps characters to Unicode. Performs a single + * Read the table that maps characters to Unicode. Performs a single * malloc to get the memory for the array and all the pages needed by the * array. */ size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->toUnicode = (unsigned short **) ckalloc(size); + dataPtr->toUnicode = ckalloc(size); memset(dataPtr->toUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); @@ -1679,7 +1731,7 @@ LoadTableEncoding(name, type, chan) Tcl_IncrRefCount(objPtr); for (i = 0; i < numPages; i++) { int ch; - char *p; + const char *p; Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); p = Tcl_GetString(objPtr); @@ -1713,9 +1765,9 @@ LoadTableEncoding(name, type, chan) } /* - * Invert toUnicode array to produce the fromUnicode array. Performs a + * Invert toUnicode array to produce the fromUnicode array. Performs a * single malloc to get the memory for the array and all the pages needed - * by the array. While reading in the toUnicode array, we remembered what + * by the array. While reading in the toUnicode array, we remembered what * pages that would be needed for the fromUnicode array. */ @@ -1729,36 +1781,33 @@ LoadTableEncoding(name, type, chan) } } size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->fromUnicode = (unsigned short **) ckalloc(size); + dataPtr->fromUnicode = ckalloc(size); memset(dataPtr->fromUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); for (hi = 0; hi < 256; hi++) { if (dataPtr->toUnicode[hi] == NULL) { dataPtr->toUnicode[hi] = emptyPage; - } else { - for (lo = 0; lo < 256; lo++) { - int ch; - - ch = dataPtr->toUnicode[hi][lo]; - if (ch != 0) { - unsigned short *page; - - page = dataPtr->fromUnicode[ch >> 8]; - if (page == NULL) { - page = pageMemPtr; - pageMemPtr += 256; - dataPtr->fromUnicode[ch >> 8] = page; - } - page[ch & 0xff] = (unsigned short) ((hi << 8) + lo); + continue; + } + for (lo = 0; lo < 256; lo++) { + int ch = dataPtr->toUnicode[hi][lo]; + + if (ch != 0) { + page = dataPtr->fromUnicode[ch >> 8]; + if (page == NULL) { + page = pageMemPtr; + pageMemPtr += 256; + dataPtr->fromUnicode[ch >> 8] = page; } + page[ch & 0xff] = (unsigned short) ((hi << 8) + lo); } } } if (type == ENCODING_MULTIBYTE) { /* * If multibyte encodings don't have a backslash character, define - * one. Otherwise, on Windows, native file names won't work because + * one. Otherwise, on Windows, native file names won't work because * the backslash in the file name will map to the unknown character * (question mark) when converting from UTF-8 to external encoding. */ @@ -1770,8 +1819,6 @@ LoadTableEncoding(name, type, chan) } } if (symbol) { - unsigned short *page; - /* * Make a special symbol encoding that not only maps the symbol * characters from their Unicode code points down into page 0, but @@ -1779,7 +1826,7 @@ LoadTableEncoding(name, type, chan) * is so that a symbol font can be used to display a simple string * like "abcd" and have alpha, beta, chi, delta show up, rather than * have "unknown" chars show up because strictly speaking the symbol - * font doesn't have glyphs for those low ascii chars. + * font doesn't have glyphs for those low ASCII chars. */ page = dataPtr->fromUnicode[0]; @@ -1800,60 +1847,81 @@ LoadTableEncoding(name, type, chan) } /* - * For trailing 'R'everse encoding, see [Patch #689341] + * For trailing 'R'everse encoding, see [Patch 689341] */ Tcl_DStringInit(&lineString); - do { - int len; + + /* + * Skip leading empty lines. + */ + + while ((len = Tcl_Gets(chan, &lineString)) == 0) { + /* empty body */ + } + if (len < 0) { + goto doneParse; + } + + /* + * Require that it starts with an 'R'. + */ + + line = Tcl_DStringValue(&lineString); + if (line[0] != 'R') { + goto doneParse; + } + + /* + * Read lines from the encoding until EOF. + */ + + for (TclDStringClear(&lineString); + (len = Tcl_Gets(chan, &lineString)) >= 0; + TclDStringClear(&lineString)) { + const unsigned char *p; + int to, from; /* - * Skip leading empty lines. + * Skip short lines. */ - while ((len = Tcl_Gets(chan, &lineString)) == 0) - ; - - if (len < 0) { - break; - } - line = Tcl_DStringValue(&lineString); - if (line[0] != 'R') { - break; + if (len < 5) { + continue; } - for (Tcl_DStringSetLength(&lineString, 0); - (len = Tcl_Gets(chan, &lineString)) >= 0; - Tcl_DStringSetLength(&lineString, 0)) { - unsigned char* p; - int to, from; - if (len < 5) { - continue; - } - p = (unsigned char*) Tcl_DStringValue(&lineString); - to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + /* + * Parse the line as a sequence of hex digits. + */ + + p = (const unsigned char *) Tcl_DStringValue(&lineString); + to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + + (staticHex[p[2]] << 4) + staticHex[p[3]]; + if (to == 0) { + continue; + } + for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) { + from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) + (staticHex[p[2]] << 4) + staticHex[p[3]]; - if (to == 0) { - continue; - } - for (p += 5, len -= 5; len >= 0 && *p; p += 5, len -= 5) { - from = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8) - + (staticHex[p[2]] << 4) + staticHex[p[3]]; - if (from == 0) { - continue; - } - dataPtr->fromUnicode[from >> 8][from & 0xff] = to; + if (from == 0) { + continue; } + dataPtr->fromUnicode[from >> 8][from & 0xff] = to; } - } while (0); + } + doneParse: Tcl_DStringFree(&lineString); + /* + * Package everything into an encoding structure. + */ + encType.encodingName = name; encType.toUtfProc = TableToUtfProc; encType.fromUtfProc = TableFromUtfProc; encType.freeProc = TableFreeProc; encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1; - encType.clientData = (ClientData) dataPtr; + encType.clientData = dataPtr; return Tcl_CreateEncoding(&encType); } @@ -1863,7 +1931,7 @@ LoadTableEncoding(name, type, chan) * * LoadEscapeEncoding -- * - * Helper function for LoadEncodingTable(). Loads a state machine that + * Helper function for LoadEncodingTable(). Loads a state machine that * converts between Unicode and some other encoding. * * File contains text data that describes the escape sequences that are @@ -1881,12 +1949,12 @@ LoadTableEncoding(name, type, chan) */ static Tcl_Encoding -LoadEscapeEncoding(name, chan) - CONST char *name; /* Name for new encoding. */ - Tcl_Channel chan; /* File containing new encoding. */ +LoadEscapeEncoding( + const char *name, /* Name for new encoding. */ + Tcl_Channel chan) /* File containing new encoding. */ { int i; - unsigned int size; + unsigned size; Tcl_DString escapeData; char init[16], final[16]; EscapeEncodingData *dataPtr; @@ -1898,7 +1966,7 @@ LoadEscapeEncoding(name, chan) while (1) { int argc; - CONST char **argv; + const char **argv; char *line; Tcl_DString lineString; @@ -1908,11 +1976,12 @@ LoadEscapeEncoding(name, chan) } line = Tcl_DStringValue(&lineString); if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) { + Tcl_DStringFree(&lineString); continue; } if (argc >= 2) { if (strcmp(argv[0], "name") == 0) { - ; + /* do nothing */ } else if (strcmp(argv[0], "init") == 0) { strncpy(init, argv[1], sizeof(init)); init[sizeof(init) - 1] = '\0'; @@ -1921,6 +1990,7 @@ LoadEscapeEncoding(name, chan) final[sizeof(final) - 1] = '\0'; } else { EscapeSubTable est; + Encoding *e; strncpy(est.sequence, argv[1], sizeof(est.sequence)); est.sequence[sizeof(est.sequence) - 1] = '\0'; @@ -1933,26 +2003,30 @@ LoadEscapeEncoding(name, chan) * To avoid infinite recursion in [encoding system iso2022-*] */ - Tcl_GetEncoding(NULL, est.name); - - est.encodingPtr = NULL; + e = (Encoding *) Tcl_GetEncoding(NULL, est.name); + if ((e != NULL) && (e->toUtfProc != TableToUtfProc) + && (e->toUtfProc != Iso88591ToUtfProc)) { + Tcl_FreeEncoding((Tcl_Encoding) e); + e = NULL; + } + est.encodingPtr = e; Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } } - ckfree((char *) argv); + ckfree(argv); Tcl_DStringFree(&lineString); } size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); - dataPtr = (EscapeEncodingData *) ckalloc(size); + dataPtr = ckalloc(size); dataPtr->initLen = strlen(init); - strcpy(dataPtr->init, init); + memcpy(dataPtr->init, init, (unsigned) dataPtr->initLen + 1); dataPtr->finalLen = strlen(final); - strcpy(dataPtr->final, final); + memcpy(dataPtr->final, final, (unsigned) dataPtr->finalLen + 1); dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); - memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData), + memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData), (size_t) Tcl_DStringLength(&escapeData)); Tcl_DStringFree(&escapeData); @@ -1967,12 +2041,16 @@ LoadEscapeEncoding(name, chan) dataPtr->prefixBytes[UCHAR(dataPtr->final[0])] = 1; } + /* + * Package everything into an encoding structure. + */ + type.encodingName = name; type.toUtfProc = EscapeToUtfProc; type.fromUtfProc = EscapeFromUtfProc; type.freeProc = EscapeFreeProc; type.nullSize = 1; - type.clientData = (ClientData) dataPtr; + type.clientData = dataPtr; return Tcl_CreateEncoding(&type); } @@ -1982,7 +2060,7 @@ LoadEscapeEncoding(name, chan) * * BinaryProc -- * - * The default conversion when no other conversion is specified. No + * The default conversion when no other conversion is specified. No * translation is done; source bytes are copied directly to destination * bytes. * @@ -1996,27 +2074,26 @@ LoadEscapeEncoding(name, chan) */ static int -BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string (unknown encoding). */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state +BinaryProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string (unknown encoding). */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the + int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { @@ -2035,9 +2112,7 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *srcReadPtr = srcLen; *dstWrotePtr = srcLen; *dstCharsPtr = srcLen; - for ( ; --srcLen >= 0; ) { - *dst++ = *src++; - } + memcpy(dst, src, (size_t) srcLen); return result; } @@ -2060,30 +2135,29 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state +UtfIntToUtfExtProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string + char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { @@ -2096,9 +2170,9 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * UtfExtToUtfIntProc -- * - * Convert from UTF-8 to UTF-8 while converting null-bytes from - * the official representation (0x00) to Tcl's internal - * representation (0xc0, 0x80). See UtfToUtfProc for details. + * Convert from UTF-8 to UTF-8 while converting null-bytes from the + * official representation (0x00) to Tcl's internal representation (0xc0, + * 0x80). See UtfToUtfProc for details. * * Results: * Returns TCL_OK if conversion was successful. @@ -2108,31 +2182,31 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * *------------------------------------------------------------------------- */ + static int -UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state +UtfExtToUtfIntProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { @@ -2145,7 +2219,7 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * UtfToUtfProc -- * - * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation + * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation * is not a no-op, because it will turn a stream of improperly formed * UTF-8 into a properly formed stream. * @@ -2159,38 +2233,37 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state +UtfToUtfProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the + int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr, /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ - int pureNullMode; /* Convert embedded nulls from internal + int pureNullMode) /* Convert embedded nulls from internal * representation to real null-bytes or vice * versa. */ { - CONST char *srcStart, *srcEnd, *srcClose; - char *dstStart, *dstEnd; + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd; int result, numChars; Tcl_UniChar ch; @@ -2235,6 +2308,16 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, *dst++ = 0; src += 2; + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { + /* + * Always check before using Tcl_UtfToUniChar. Not doing can so + * cause it run beyond the endof the buffer! If we happen such an + * incomplete char its byts are made to represent themselves. + */ + + ch = (unsigned char) *src; + src += 1; + dst += Tcl_UniCharToUtf(ch, dst); } else { src += Tcl_UtfToUniChar(src, &ch); dst += Tcl_UniCharToUtf(ch, dst); @@ -2264,36 +2347,36 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* Not used. */ - CONST char *src; /* Source string in Unicode. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state +UnicodeToUtfProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in Unicode. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd; - char *dstEnd, *dstStart; + const char *srcStart, *srcEnd; + const char *dstEnd, *dstStart; int result, numChars; + Tcl_UniChar ch; result = TCL_OK; if ((srcLen % sizeof(Tcl_UniChar)) != 0) { @@ -2302,33 +2385,33 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcLen *= sizeof(Tcl_UniChar); } - wSrc = (Tcl_UniChar *) src; - - wSrcStart = (Tcl_UniChar *) src; - wSrcEnd = (Tcl_UniChar *) (src + srcLen); + srcStart = src; + srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; wSrc < wSrcEnd; numChars++) { + for (numChars = 0; src < srcEnd; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } /* - * Special case for 1-byte utf chars for speed. + * Special case for 1-byte utf chars for speed. Make sure we work with + * Tcl_UniChar-size data. */ - if (*wSrc && *wSrc < 0x80) { - *dst++ = (char) *wSrc; + ch = *(Tcl_UniChar *)src; + if (ch && ch < 0x80) { + *dst++ = (ch & 0xFF); } else { - dst += Tcl_UniCharToUtf(*wSrc, dst); + dst += Tcl_UniCharToUtf(ch, dst); } - wSrc++; + src += sizeof(Tcl_UniChar); } - *srcReadPtr = (char *) wSrc - (char *) wSrcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; @@ -2351,37 +2434,36 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies +UtfToUnicodeProc( + ClientData clientData, /* TableEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; - Tcl_UniChar *wDst, *wDstStart, *wDstEnd; + const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; + Tcl_UniChar ch; srcStart = src; srcEnd = src + srcLen; @@ -2390,9 +2472,8 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcClose -= TCL_UTF_MAX; } - wDst = (Tcl_UniChar *) dst; - wDstStart = (Tcl_UniChar *) dst; - wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar)); + dstStart = dst; + dstEnd = dst + dstLen - sizeof(Tcl_UniChar); result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { @@ -2405,16 +2486,28 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_MULTIBYTE; break; } - if (wDst > wDstEnd) { + if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } - src += TclUtfToUniChar(src, wDst); - wDst++; - } + } + src += TclUtfToUniChar(src, &ch); + + /* + * Need to handle this in a way that won't cause misalignment by + * casting dst to a Tcl_UniChar. [Bug 1122671] + * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. + */ +#ifdef WORDS_BIGENDIAN + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); +#else + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); +#endif + } *srcReadPtr = src - srcStart; - *dstWrotePtr = (char *) wDst - (char *) wDstStart; + *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } @@ -2437,41 +2530,40 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies +TableToUtfProc( + ClientData clientData, /* TableEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd; - char *dstEnd, *dstStart, *prefixBytes; + const char *srcStart, *srcEnd; + const char *dstEnd, *dstStart, *prefixBytes; int result, byte, numChars; Tcl_UniChar ch; - unsigned short **toUnicode; - unsigned short *pageZero; - TableEncodingData *dataPtr; + const unsigned short *const *toUnicode; + const unsigned short *pageZero; + TableEncodingData *dataPtr = clientData; srcStart = src; srcEnd = src + srcLen; @@ -2479,8 +2571,7 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - dataPtr = (TableEncodingData *) clientData; - toUnicode = dataPtr->toUnicode; + toUnicode = (const unsigned short *const *) dataPtr->toUnicode; prefixBytes = dataPtr->prefixBytes; pageZero = toUnicode[0]; @@ -2512,9 +2603,11 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } ch = (Tcl_UniChar) byte; } + /* * Special case for 1-byte utf chars for speed. */ + if (ch && ch < 0x80) { *dst++ = (char) ch; } else { @@ -2547,46 +2640,44 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* TableEncodingData that specifies +TableFromUtfProc( + ClientData clientData, /* TableEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; - char *dstStart, *dstEnd, *prefixBytes; + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd, *prefixBytes; Tcl_UniChar ch; int result, len, word, numChars; - TableEncodingData *dataPtr; - unsigned short **fromUnicode; + TableEncodingData *dataPtr = clientData; + const unsigned short *const *fromUnicode; result = TCL_OK; - dataPtr = (TableEncodingData *) clientData; prefixBytes = dataPtr->prefixBytes; - fromUnicode = dataPtr->fromUnicode; + fromUnicode = (const unsigned short *const *) dataPtr->fromUnicode; srcStart = src; srcEnd = src + srcLen; @@ -2612,7 +2703,7 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, #if TCL_UTF_MAX > 3 /* - * This prevents a crash condition. More evaluation is required for + * This prevents a crash condition. More evaluation is required for * full support of int Tcl_UniChar. [Bug 1004065] */ @@ -2655,11 +2746,197 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } /* + *------------------------------------------------------------------------- + * + * Iso88591ToUtfProc -- + * + * Convert from the "iso8859-1" encoding into UTF-8. + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +Iso88591ToUtfProc( + ClientData clientData, /* Ignored. */ + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd; + const char *dstEnd, *dstStart; + int result, numChars; + + srcStart = src; + srcEnd = src + srcLen; + + dstStart = dst; + dstEnd = dst + dstLen - TCL_UTF_MAX; + + result = TCL_OK; + for (numChars = 0; src < srcEnd; numChars++) { + Tcl_UniChar ch; + + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + ch = (Tcl_UniChar) *((unsigned char *) src); + + /* + * Special case for 1-byte utf chars for speed. + */ + + if (ch && ch < 0x80) { + *dst++ = (char) ch; + } else { + dst += Tcl_UniCharToUtf(ch, dst); + } + src++; + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} + +/* + *------------------------------------------------------------------------- + * + * Iso88591FromUtfProc -- + * + * Convert from UTF-8 into the encoding "iso8859-1". + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +Iso88591FromUtfProc( + ClientData clientData, /* Ignored. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd; + int result, numChars; + + result = TCL_OK; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= TCL_UTF_MAX; + } + + dstStart = dst; + dstEnd = dst + dstLen - 1; + + for (numChars = 0; src < srcEnd; numChars++) { + Tcl_UniChar ch; + int len; + + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + /* + * If there is more string to follow, this will ensure that the + * last UTF-8 character in the source buffer hasn't been cut off. + */ + + result = TCL_CONVERT_MULTIBYTE; + break; + } + len = TclUtfToUniChar(src, &ch); + + /* + * Check for illegal characters. + */ + + if (ch > 0xff) { + if (flags & TCL_ENCODING_STOPONERROR) { + result = TCL_CONVERT_UNKNOWN; + break; + } + + /* + * Plunge on, using '?' as a fallback character. + */ + + ch = (Tcl_UniChar) '?'; + } + + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + *(dst++) = (char) ch; + src += len; + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} + +/* *--------------------------------------------------------------------------- * * TableFreeProc -- * - * This procedure is invoked when an encoding is deleted. It deletes the + * This function is invoked when an encoding is deleted. It deletes the * memory used by the TableEncodingData. * * Results: @@ -2672,20 +2949,19 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static void -TableFreeProc(clientData) - ClientData clientData; /* TableEncodingData that specifies +TableFreeProc( + ClientData clientData) /* TableEncodingData that specifies * encoding. */ { - TableEncodingData *dataPtr; + TableEncodingData *dataPtr = clientData; /* - * Make sure we aren't freeing twice on shutdown. [Bug #219314] + * Make sure we aren't freeing twice on shutdown. [Bug 219314] */ - dataPtr = (TableEncodingData *) clientData; - ckfree((char *) dataPtr->toUnicode); - ckfree((char *) dataPtr->fromUnicode); - ckfree((char *) dataPtr); + ckfree(dataPtr->toUnicode); + ckfree(dataPtr->fromUnicode); + ckfree(dataPtr); } /* @@ -2706,48 +2982,43 @@ TableFreeProc(clientData) */ static int -EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* EscapeEncodingData that specifies +EscapeToUtfProc( + ClientData clientData, /* EscapeEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in specified encoding. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + const char *src, /* Source string in specified encoding. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the - * source string that were converted. This - * may be less than the original source length - * if there was a problem converting some - * source characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - EscapeEncodingData *dataPtr; - char *prefixBytes, *tablePrefixBytes; - unsigned short **tableToUnicode; - Encoding *encodingPtr; + EscapeEncodingData *dataPtr = clientData; + const char *prefixBytes, *tablePrefixBytes, *srcStart, *srcEnd; + const unsigned short *const *tableToUnicode; + const Encoding *encodingPtr; int state, result, numChars; - CONST char *srcStart, *srcEnd; - char *dstStart, *dstEnd; + const char *dstStart, *dstEnd; result = TCL_OK; - tablePrefixBytes = NULL; /* lint. */ tableToUnicode = NULL; /* lint. */ - - dataPtr = (EscapeEncodingData *) clientData; prefixBytes = dataPtr->prefixBytes; encodingPtr = NULL; @@ -2757,7 +3028,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - state = (int) *statePtr; + state = PTR2INT(*statePtr); if (flags & TCL_ENCODING_START) { state = 0; } @@ -2771,9 +3042,9 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } byte = *((unsigned char *) src); if (prefixBytes[byte]) { - unsigned int left, len, longest; + unsigned left, len, longest; int checked, i; - EscapeSubTable *subTablePtr; + const EscapeSubTable *subTablePtr; /* * Saw the beginning of an escape sequence. @@ -2845,7 +3116,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } /* - * We have a split-up or unrecognized escape sequence. If we + * We have a split-up or unrecognized escape sequence. If we * checked all the sequences, then it's a syntax error, otherwise * we need more bytes to determine a match. */ @@ -2871,9 +3142,10 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, TableEncodingData *tableDataPtr; encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = (TableEncodingData *) encodingPtr->clientData; + tableDataPtr = encodingPtr->clientData; tablePrefixBytes = tableDataPtr->prefixBytes; - tableToUnicode = tableDataPtr->toUnicode; + tableToUnicode = (const unsigned short *const*) + tableDataPtr->toUnicode; } if (tablePrefixBytes[byte]) { @@ -2896,7 +3168,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, numChars++; } - *statePtr = (Tcl_EncodingState) state; + *statePtr = (Tcl_EncodingState) INT2PTR(state); *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -2921,47 +3193,44 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static int -EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, - srcReadPtr, dstWrotePtr, dstCharsPtr) - ClientData clientData; /* EscapeEncodingData that specifies +EscapeFromUtfProc( + ClientData clientData, /* EscapeEncodingData that specifies * encoding. */ - CONST char *src; /* Source string in UTF-8. */ - int srcLen; /* Source string length in bytes. */ - int flags; /* Conversion control flags. */ - Tcl_EncodingState *statePtr;/* Place for conversion routine to store state + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise - * conversion. Contents of statePtr are + * conversion. Contents of statePtr are * initialized and/or reset by conversion * routine under control of flags argument. */ - char *dst; /* Output buffer in which converted string is + char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen; /* The maximum length of output buffer in + int dstLen, /* The maximum length of output buffer in * bytes. */ - int *srcReadPtr; /* Filled with the number of bytes from the + int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may * be less than the original source length if * there was a problem converting some source * characters. */ - int *dstWrotePtr; /* Filled with the number of bytes that were + int *dstWrotePtr, /* Filled with the number of bytes that were * stored in the output buffer as a result of * the conversion. */ - int *dstCharsPtr; /* Filled with the number of characters that + int *dstCharsPtr) /* Filled with the number of characters that * correspond to the bytes stored in the * output buffer. */ { - EscapeEncodingData *dataPtr; - Encoding *encodingPtr; - CONST char *srcStart, *srcEnd, *srcClose; - char *dstStart, *dstEnd; + EscapeEncodingData *dataPtr = clientData; + const Encoding *encodingPtr; + const char *srcStart, *srcEnd, *srcClose; + const char *dstStart, *dstEnd; int state, result, numChars; - TableEncodingData *tableDataPtr; - char *tablePrefixBytes; - unsigned short **tableFromUnicode; + const TableEncodingData *tableDataPtr; + const char *tablePrefixBytes; + const unsigned short *const *tableFromUnicode; result = TCL_OK; - dataPtr = (EscapeEncodingData *) clientData; - srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2973,30 +3242,31 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, dstEnd = dst + dstLen - 1; /* - * RFC1468 states that the text starts in ASCII, and switches to Japanese - * characters, and that the text must end in ASCII. [Patch #474358] + * RFC 1468 states that the text starts in ASCII, and switches to Japanese + * characters, and that the text must end in ASCII. [Patch 474358] */ if (flags & TCL_ENCODING_START) { state = 0; - if (dst + dataPtr->initLen > dstEnd) { + if ((dst + dataPtr->initLen) > dstEnd) { *srcReadPtr = 0; *dstWrotePtr = 0; return TCL_CONVERT_NOSPACE; } - memcpy((VOID *)dst, (VOID *)dataPtr->init, (size_t)dataPtr->initLen); + memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen); dst += dataPtr->initLen; } else { - state = (int) *statePtr; + state = PTR2INT(*statePtr); } encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = (TableEncodingData *) encodingPtr->clientData; + tableDataPtr = encodingPtr->clientData; tablePrefixBytes = tableDataPtr->prefixBytes; - tableFromUnicode = tableDataPtr->fromUnicode; + tableFromUnicode = (const unsigned short *const *) + tableDataPtr->fromUnicode; for (numChars = 0; src < srcEnd; numChars++) { - unsigned int len; + unsigned len; int word; Tcl_UniChar ch; @@ -3014,13 +3284,13 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, if ((word == 0) && (ch != 0)) { int oldState; - EscapeSubTable *subTablePtr; + const EscapeSubTable *subTablePtr; oldState = state; for (state = 0; state < dataPtr->numSubTables; state++) { encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = (TableEncodingData *) encodingPtr->clientData; - word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff]; + tableDataPtr = encodingPtr->clientData; + word = tableDataPtr->fromUnicode[(ch >> 8)][ch & 0xff]; if (word != 0) { break; } @@ -3033,12 +3303,13 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, break; } encodingPtr = GetTableEncoding(dataPtr, state); - tableDataPtr = (TableEncodingData *) encodingPtr->clientData; + tableDataPtr = encodingPtr->clientData; word = tableDataPtr->fallback; } - tablePrefixBytes = tableDataPtr->prefixBytes; - tableFromUnicode = tableDataPtr->fromUnicode; + tablePrefixBytes = (const char *) tableDataPtr->prefixBytes; + tableFromUnicode = (const unsigned short *const *) + tableDataPtr->fromUnicode; /* * The state variable has the value of oldState when word is 0. @@ -3060,7 +3331,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_NOSPACE; break; } - memcpy((VOID *) dst, (VOID *) subTablePtr->sequence, + memcpy(dst, subTablePtr->sequence, (size_t) subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } @@ -3086,22 +3357,31 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, } if ((result == TCL_OK) && (flags & TCL_ENCODING_END)) { - unsigned int len = dataPtr->subTables[0].sequenceLen; - if (dst + dataPtr->finalLen + (state?len:0) > dstEnd) { + unsigned len = dataPtr->subTables[0].sequenceLen; + + /* + * Certain encodings like iso2022-jp need to write an escape sequence + * after all characters have been converted. This logic checks that + * enough room is available in the buffer for the escape bytes. The + * TCL_ENCODING_END flag is cleared after a final escape sequence has + * been added to the buffer so that another call to this method does + * not attempt to append escape bytes a second time. + */ + + if ((dst + dataPtr->finalLen + (state?len:0)) > dstEnd) { result = TCL_CONVERT_NOSPACE; } else { if (state) { - memcpy((VOID *) dst, (VOID *) dataPtr->subTables[0].sequence, - (size_t) len); + memcpy(dst, dataPtr->subTables[0].sequence, len); dst += len; } - memcpy((VOID *) dst, (VOID *) dataPtr->final, - (size_t) dataPtr->finalLen); + memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen); dst += dataPtr->finalLen; + state &= ~TCL_ENCODING_END; } } - *statePtr = (Tcl_EncodingState) state; + *statePtr = (Tcl_EncodingState) INT2PTR(state); *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; @@ -3113,8 +3393,8 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * EscapeFreeProc -- * - * This procedure is invoked when an EscapeEncodingData encoding is - * deleted. It deletes the memory used by the encoding. + * This function is invoked when an EscapeEncodingData encoding is + * deleted. It deletes the memory used by the encoding. * * Results: * None. @@ -3126,23 +3406,37 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, */ static void -EscapeFreeProc(clientData) - ClientData clientData; /* EscapeEncodingData that specifies encoding. */ +EscapeFreeProc( + ClientData clientData) /* EscapeEncodingData that specifies + * encoding. */ { - EscapeEncodingData *dataPtr; + EscapeEncodingData *dataPtr = clientData; EscapeSubTable *subTablePtr; int i; - dataPtr = (EscapeEncodingData *) clientData; if (dataPtr == NULL) { return; } - subTablePtr = dataPtr->subTables; - for (i = 0; i < dataPtr->numSubTables; i++) { - FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr); - subTablePtr++; + + /* + * The subTables should be freed recursively in normal operation but not + * during TclFinalizeEncodingSubsystem because they are also present as a + * weak reference in the toplevel encodingTable (i.e., they don't have a + * +1 refcount for this), and unpredictable nuking order could remove them + * from under the following loop's feet. [Bug 2891556] + * + * The encodingsInitialized flag, being reset on entry to TFES, can serve + * as a "not in finalization" test. + */ + + if (encodingsInitialized) { + subTablePtr = dataPtr->subTables; + for (i = 0; i < dataPtr->numSubTables; i++) { + FreeEncoding((Tcl_Encoding) subTablePtr->encodingPtr); + subTablePtr++; + } } - ckfree((char *) dataPtr); + ckfree(dataPtr); } /* @@ -3150,7 +3444,7 @@ EscapeFreeProc(clientData) * * GetTableEncoding -- * - * Helper function for the EscapeEncodingData conversions. Gets the + * Helper function for the EscapeEncodingData conversions. Gets the * encoding (of type TextEncodingData) that represents the specified * state. * @@ -3166,20 +3460,18 @@ EscapeFreeProc(clientData) */ static Encoding * -GetTableEncoding(dataPtr, state) - EscapeEncodingData *dataPtr;/* Contains names of encodings. */ - int state; /* Index in dataPtr of desired Encoding. */ +GetTableEncoding( + EscapeEncodingData *dataPtr,/* Contains names of encodings. */ + int state) /* Index in dataPtr of desired Encoding. */ { - EscapeSubTable *subTablePtr; - Encoding *encodingPtr; - - subTablePtr = &dataPtr->subTables[state]; - encodingPtr = subTablePtr->encodingPtr; + EscapeSubTable *subTablePtr = &dataPtr->subTables[state]; + Encoding *encodingPtr = subTablePtr->encodingPtr; if (encodingPtr == NULL) { encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name); if ((encodingPtr == NULL) - || (encodingPtr->toUtfProc != TableToUtfProc)) { + || (encodingPtr->toUtfProc != TableToUtfProc + && encodingPtr->toUtfProc != Iso88591ToUtfProc)) { Tcl_Panic("EscapeToUtfProc: invalid sub table"); } subTablePtr->encodingPtr = encodingPtr; @@ -3193,7 +3485,7 @@ GetTableEncoding(dataPtr, state) * * unilen -- * - * A helper function for the Tcl_ExternalToUtf functions. This function + * A helper function for the Tcl_ExternalToUtf functions. This function * is similar to strlen for double-byte characters: it returns the number * of bytes in a 0x0000 terminated string. * @@ -3207,8 +3499,8 @@ GetTableEncoding(dataPtr, state) */ static size_t -unilen(src) - CONST char *src; +unilen( + const char *src) { unsigned short *p; @@ -3226,7 +3518,7 @@ unilen(src) * * This is the fallback routine that sets the default value of the * encoding search path if the application has not set one via a call to - * TclSetEncodingSearchPath() by the first time the search path is needed + * Tcl_SetEncodingSearchPath() by the first time the search path is needed * to load encoding data. * * The default encoding search path is produced by taking each directory @@ -3243,47 +3535,48 @@ unilen(src) */ static void -InitializeEncodingSearchPath(valuePtr, lengthPtr, encodingPtr) - char **valuePtr; - int *lengthPtr; - Tcl_Encoding *encodingPtr; +InitializeEncodingSearchPath( + char **valuePtr, + int *lengthPtr, + Tcl_Encoding *encodingPtr) { - char *bytes; + const char *bytes; int i, numDirs, numBytes; - Tcl_Obj *libPath, *encodingObj = Tcl_NewStringObj("encoding", -1); - Tcl_Obj *searchPath = Tcl_NewObj(); + Tcl_Obj *libPathObj, *encodingObj, *searchPathObj; + TclNewLiteralStringObj(encodingObj, "encoding"); + TclNewObj(searchPathObj); Tcl_IncrRefCount(encodingObj); - Tcl_IncrRefCount(searchPath); - libPath = TclGetLibraryPath(); - Tcl_IncrRefCount(libPath); - Tcl_ListObjLength(NULL, libPath, &numDirs); + Tcl_IncrRefCount(searchPathObj); + libPathObj = TclGetLibraryPath(); + Tcl_IncrRefCount(libPathObj); + Tcl_ListObjLength(NULL, libPathObj, &numDirs); for (i = 0; i < numDirs; i++) { - Tcl_Obj *directory, *path; + Tcl_Obj *directoryObj, *pathObj; Tcl_StatBuf stat; - Tcl_ListObjIndex(NULL, libPath, i, &directory); - path = Tcl_FSJoinToPath(directory, 1, &encodingObj); - Tcl_IncrRefCount(path); - if ((0 == Tcl_FSStat(path, &stat)) && S_ISDIR(stat.st_mode)) { - Tcl_ListObjAppendElement(NULL, searchPath, path); + Tcl_ListObjIndex(NULL, libPathObj, i, &directoryObj); + pathObj = Tcl_FSJoinToPath(directoryObj, 1, &encodingObj); + Tcl_IncrRefCount(pathObj); + if ((0 == Tcl_FSStat(pathObj, &stat)) && S_ISDIR(stat.st_mode)) { + Tcl_ListObjAppendElement(NULL, searchPathObj, pathObj); } - Tcl_DecrRefCount(path); + Tcl_DecrRefCount(pathObj); } - Tcl_DecrRefCount(libPath); + Tcl_DecrRefCount(libPathObj); Tcl_DecrRefCount(encodingObj); *encodingPtr = libraryPath.encoding; if (*encodingPtr) { ((Encoding *)(*encodingPtr))->refCount++; } - bytes = Tcl_GetStringFromObj(searchPath, &numBytes); + bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes); *lengthPtr = numBytes; - *valuePtr = ckalloc((unsigned int) numBytes + 1); - memcpy((VOID *) *valuePtr, (VOID *) bytes, (size_t) numBytes + 1); - Tcl_DecrRefCount(searchPath); + *valuePtr = ckalloc(numBytes + 1); + memcpy(*valuePtr, bytes, (size_t) numBytes + 1); + Tcl_DecrRefCount(searchPathObj); } /* |