summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c967
1 files changed, 520 insertions, 447 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 646713d..8108771 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclEncoding.c,v 1.35 2005/05/10 18:34:34 kennykb Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.36 2005/07/17 21:17:40 dkf Exp $
*/
#include "tclInt.h"
@@ -16,8 +16,8 @@
typedef size_t (LengthProc)_ANSI_ARGS_((CONST char *src));
/*
- * The following data structure represents an encoding, which describes how
- * to convert between various character sets and UTF-8.
+ * The following data structure represents an encoding, which describes how to
+ * convert between various character sets and UTF-8.
*/
typedef struct Encoding {
@@ -28,8 +28,8 @@ typedef struct Encoding {
* Tcl_EncodingType structure may not be
* persistent. */
Tcl_EncodingConvertProc *toUtfProc;
- /* Procedure to convert from external
- * encoding into UTF-8. */
+ /* Procedure to convert from external encoding
+ * into UTF-8. */
Tcl_EncodingConvertProc *fromUtfProc;
/* Procedure to convert from UTF-8 into
* external encoding. */
@@ -61,9 +61,9 @@ typedef struct Encoding {
*/
typedef struct TableEncodingData {
- int fallback; /* Character (in this encoding) to
- * substitute when this encoding cannot
- * represent a UTF-8 character. */
+ int fallback; /* Character (in this encoding) to substitute
+ * when this encoding cannot represent a UTF-8
+ * character. */
char prefixBytes[256]; /* If a byte in the input stream is a lead
* byte for a 2-byte sequence, the
* corresponding entry in this array is 1,
@@ -73,7 +73,8 @@ typedef struct TableEncodingData {
* Each element of the toUnicode array points
* to an array of 256 shorts. If there is no
* corresponding character in Unicode, the
- * value in the matrix is 0x0000. malloc'd. */
+ * value in the matrix is 0x0000.
+ * malloc'd. */
unsigned short **fromUnicode;
/* Two dimensional sparse matrix to map
* characters from Unicode to the encoding.
@@ -86,11 +87,11 @@ typedef struct TableEncodingData {
/*
* The following structures is the clientData for a dynamically-loaded,
- * escape-driven encoding that is itself comprised of other simpler
- * encodings. An example is "iso-2022-jp", which uses escape sequences to
- * switch between ascii, jis0208, jis0212, gb2312, and ksc5601. Note that
- * "escape-driven" does not necessarily mean that the ESCAPE character is
- * the character used for switching character sets.
+ * escape-driven encoding that is itself comprised of other simpler encodings.
+ * An example is "iso-2022-jp", which uses escape sequences to switch between
+ * ascii, jis0208, jis0212, gb2312, and ksc5601. Note that "escape-driven"
+ * does not necessarily mean that the ESCAPE character is the character used
+ * for switching character sets.
*/
typedef struct EscapeSubTable {
@@ -103,25 +104,25 @@ typedef struct EscapeSubTable {
} EscapeSubTable;
typedef struct EscapeEncodingData {
- int fallback; /* Character (in this encoding) to
- * substitute when this encoding cannot
- * represent a UTF-8 character. */
+ int fallback; /* Character (in this encoding) to substitute
+ * when this encoding cannot represent a UTF-8
+ * character. */
unsigned int initLen; /* Length of following string. */
char init[16]; /* String to emit or expect before first char
* in conversion. */
unsigned int finalLen; /* Length of following string. */
- char final[16]; /* String to emit or expect after last char
- * in conversion. */
- char prefixBytes[256]; /* If a byte in the input stream is the
- * first character of one of the escape
- * sequences in the following array, the
- * corresponding entry in this array is 1,
- * otherwise it is 0. */
+ char final[16]; /* String to emit or expect after last char in
+ * conversion. */
+ char prefixBytes[256]; /* If a byte in the input stream is the first
+ * character of one of the escape sequences in
+ * the following array, the corresponding
+ * entry in this array is 1, otherwise it is
+ * 0. */
int numSubTables; /* Length of following array. */
- EscapeSubTable subTables[1];/* Information about each EscapeSubTable
- * used by this encoding type. The actual
- * size will be as large as necessary to
- * hold all EscapeSubTables. */
+ EscapeSubTable subTables[1];/* Information about each EscapeSubTable used
+ * by this encoding type. The actual size
+ * will be as large as necessary to hold all
+ * EscapeSubTables. */
} EscapeEncodingData;
/*
@@ -135,49 +136,51 @@ typedef struct EscapeEncodingData {
#define ENCODING_ESCAPE 3
/*
- * A list of directories in which Tcl should look for *.enc files.
- * This list is shared by all threads. Access is governed by a
- * mutex lock.
+ * A list of directories in which Tcl should look for *.enc files. This list
+ * is shared by all threads. Access is governed by a mutex lock.
*/
-static TclInitProcessGlobalValueProc InitializeEncodingSearchPath;
-static ProcessGlobalValue encodingSearchPath =
- {0, 0, NULL, NULL, InitializeEncodingSearchPath, NULL, NULL};
+static TclInitProcessGlobalValueProc InitializeEncodingSearchPath;
+static ProcessGlobalValue encodingSearchPath = {
+ 0, 0, NULL, NULL, InitializeEncodingSearchPath, NULL, NULL
+};
/*
- * A map from encoding names to the directories in which their data
- * files have been seen. The string value of the map is shared by all
- * threads. Access to the shared string is governed by a mutex lock.
+ * A map from encoding names to the directories in which their data files have
+ * been seen. The string value of the map is shared by all threads. Access
+ * to the shared string is governed by a mutex lock.
*/
-static ProcessGlobalValue encodingFileMap =
- {0, 0, NULL, NULL, NULL, NULL, NULL};
+static ProcessGlobalValue encodingFileMap = {
+ 0, 0, NULL, NULL, NULL, NULL, NULL
+};
/*
- * A list of directories making up the "library path". Historically
- * this search path has served many uses, but the only one remaining
- * is a base for the encodingSearchPath above. If the application
- * does not explicitly set the encodingSearchPath, then it will be
- * initialized by appending /encoding to each directory in this
- * "libraryPath".
+ * A list of directories making up the "library path". Historically this
+ * search path has served many uses, but the only one remaining is a base for
+ * the encodingSearchPath above. If the application does not explicitly set
+ * the encodingSearchPath, then it will be initialized by appending /encoding
+ * to each directory in this "libraryPath".
*/
-static ProcessGlobalValue libraryPath =
- {0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL};
+
+static ProcessGlobalValue libraryPath = {
+ 0, 0, NULL, NULL, TclpInitLibraryPath, NULL, NULL
+};
static int encodingsInitialized = 0;
/*
- * Hash table that keeps track of all loaded Encodings. Keys are
- * the string names that represent the encoding, values are (Encoding *).
+ * Hash table that keeps track of all loaded Encodings. Keys are the string
+ * names that represent the encoding, values are (Encoding *).
*/
-
+
static Tcl_HashTable encodingTable;
TCL_DECLARE_MUTEX(encodingMutex)
/*
- * The following are used to hold the default and current system encodings.
- * If NULL is passed to one of the conversion routines, the current setting
- * of the system encoding will be used to perform the conversion.
+ * The following are used to hold the default and current system encodings.
+ * If NULL is passed to one of the conversion routines, the current setting of
+ * the system encoding will be used to perform the conversion.
*/
static Tcl_Encoding defaultEncoding;
@@ -221,10 +224,10 @@ static Tcl_Encoding LoadEncodingFile _ANSI_ARGS_((Tcl_Interp *interp,
CONST char *name));
static Tcl_Encoding LoadTableEncoding _ANSI_ARGS_((CONST char *name,
int type, Tcl_Channel chan));
-static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name,
+static Tcl_Encoding LoadEscapeEncoding _ANSI_ARGS_((CONST char *name,
Tcl_Channel chan));
-static Tcl_Channel OpenEncodingFileChannel _ANSI_ARGS_((Tcl_Interp *interp,
- CONST char *name));
+static Tcl_Channel OpenEncodingFileChannel _ANSI_ARGS_((
+ Tcl_Interp *interp, CONST char *name));
static void TableFreeProc _ANSI_ARGS_((ClientData clientData));
static int TableFromUtfProc _ANSI_ARGS_((ClientData clientData,
CONST char *src, int srcLen, int flags,
@@ -264,35 +267,35 @@ static int UtfExtToUtfIntProc _ANSI_ARGS_((ClientData clientData,
int *dstCharsPtr));
/*
- * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep.
- * This should help the lifetime of encodings be more useful.
- * See concerns raised in [Bug 1077262].
+ * A Tcl_ObjType for holding a cached Tcl_Encoding as the intrep. This should
+ * help the lifetime of encodings be more useful. See concerns raised in [Bug
+ * 1077262].
*/
static Tcl_ObjType EncodingType = {
"encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
};
-
/*
*----------------------------------------------------------------------
*
* TclGetEncodingFromObj --
*
- * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr),
- * if possible, and returns TCL_OK. If no such encoding exists,
- * TCL_ERROR is returned, and if interp is non-NULL, an error message
- * is written there.
+ * Writes to (*encodingPtr) the Tcl_Encoding value of (*objPtr), if
+ * possible, and returns TCL_OK. If no such encoding exists, TCL_ERROR
+ * is returned, and if interp is non-NULL, an error message is written
+ * there.
*
* Results:
- * Standard Tcl return code.
+ * Standard Tcl return code.
*
* Side effects:
* Caches the Tcl_Encoding value as the internal rep of (*objPtr).
*
*----------------------------------------------------------------------
*/
-int
+
+int
TclGetEncodingFromObj(interp, objPtr, encodingPtr)
Tcl_Interp *interp;
Tcl_Obj *objPtr;
@@ -318,10 +321,11 @@ TclGetEncodingFromObj(interp, objPtr, encodingPtr)
*
* FreeEncodingIntRep --
*
- * The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
+ * The Tcl_FreeInternalRepProc for the "encoding" Tcl_ObjType.
*
*----------------------------------------------------------------------
*/
+
static void
FreeEncodingIntRep(objPtr)
Tcl_Obj *objPtr;
@@ -334,10 +338,11 @@ FreeEncodingIntRep(objPtr)
*
* DupEncodingIntRep --
*
- * The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
+ * The Tcl_DupInternalRepProc for the "encoding" Tcl_ObjType.
*
*----------------------------------------------------------------------
*/
+
static void
DupEncodingIntRep(srcPtr, dupPtr)
Tcl_Obj *srcPtr;
@@ -352,12 +357,11 @@ DupEncodingIntRep(srcPtr, dupPtr)
*
* TclGetEncodingSearchPath --
*
- * Keeps the per-thread copy of the encoding search path current
- * with changes to the global copy.
+ * Keeps the per-thread copy of the encoding search path current with
+ * changes to the global copy.
*
* Results:
- * Returns a "list" (Tcl_Obj *) that contains the encoding
- * search path.
+ * Returns a "list" (Tcl_Obj *) that contains the encoding search path.
*
*----------------------------------------------------------------------
*/
@@ -372,15 +376,15 @@ TclGetEncodingSearchPath() {
*
* TclSetEncodingSearchPath --
*
- * Keeps the per-thread copy of the encoding search path current
- * with changes to the global copy.
+ * Keeps the per-thread copy of the encoding search path current with
+ * changes to the global copy.
*
*----------------------------------------------------------------------
*/
-int
+int
TclSetEncodingSearchPath(searchPath)
- Tcl_Obj *searchPath;
+ Tcl_Obj *searchPath;
{
int dummy;
@@ -396,11 +400,11 @@ TclSetEncodingSearchPath(searchPath)
*
* TclGetLibraryPath --
*
- * Keeps the per-thread copy of the library path current
- * with changes to the global copy.
+ * Keeps the per-thread copy of the library path current with changes to
+ * the global copy.
*
* Results:
- * Returns a "list" (Tcl_Obj *) that contains the library path.
+ * Returns a "list" (Tcl_Obj *) that contains the library path.
*
*----------------------------------------------------------------------
*/
@@ -415,19 +419,19 @@ TclGetLibraryPath() {
*
* TclSetLibraryPath --
*
- * Keeps the per-thread copy of the library path current
- * with changes to the global copy.
+ * Keeps the per-thread copy of the library path current with changes to
+ * the global copy.
*
- * NOTE: this routine returns void, so there's no way to
- * report the error that searchPath is not a valid list.
- * In that case, this routine will silently do nothing.
+ * NOTE: this routine returns void, so there's no way to report the error
+ * that searchPath is not a valid list. In that case, this routine will
+ * silently do nothing.
*
*----------------------------------------------------------------------
*/
void
TclSetLibraryPath(path)
- Tcl_Obj *path;
+ Tcl_Obj *path;
{
int dummy;
@@ -442,18 +446,17 @@ TclSetLibraryPath(path)
*
* FillEncodingFileMap --
*
- * Called to bring the encoding file map in sync with the current
- * value of the encoding search path.
+ * Called to bring the encoding file map in sync with the current value
+ * of the encoding search path.
*
- * Scan the directories on the encoding search path, find the
- * *.enc files, and store the found pathnames in a map associated
- * with the encoding name.
+ * Scan the directories on the encoding search path, find the *.enc
+ * files, and store the found pathnames in a map associated with the
+ * encoding name.
*
- * In particular, if $dir is on the encoding search path, and the
- * file $dir/foo.enc is found, then store a "foo" -> $dir entry
- * in the map. Later, any need for the "foo" encoding will quickly
- * be able to construct the $dir/foo.enc pathname for reading the
- * encoding data.
+ * In particular, if $dir is on the encoding search path, and the file
+ * $dir/foo.enc is found, then store a "foo" -> $dir entry in the map.
+ * Later, any need for the "foo" encoding will quickly * be able to
+ * construct the $dir/foo.enc pathname for reading the encoding data.
*
* Results:
* None.
@@ -475,23 +478,25 @@ FillEncodingFileMap()
Tcl_ListObjLength(NULL, searchPath, &numDirs);
map = Tcl_NewDictObj();
Tcl_IncrRefCount(map);
+
for (i = numDirs-1; i >= 0; i--) {
- /*
- * Iterate backwards through the search path so as we
- * overwrite entries found, we favor files earlier on
- * the search path.
+ /*
+ * Iterate backwards through the search path so as we overwrite
+ * entries found, we favor files earlier on the search path.
*/
+
int j, numFiles;
Tcl_Obj *directory, *matchFileList = Tcl_NewObj();
Tcl_Obj **filev;
- Tcl_GlobTypeData readableFiles =
- {TCL_GLOB_TYPE_FILE, TCL_GLOB_PERM_R, NULL, NULL};
+ Tcl_GlobTypeData readableFiles = {
+ TCL_GLOB_TYPE_FILE, TCL_GLOB_PERM_R, NULL, NULL
+ };
Tcl_ListObjIndex(NULL, searchPath, i, &directory);
Tcl_IncrRefCount(directory);
Tcl_IncrRefCount(matchFileList);
- Tcl_FSMatchInDirectory(NULL, matchFileList,
- directory, "*.enc", &readableFiles);
+ Tcl_FSMatchInDirectory(NULL, matchFileList, directory, "*.enc",
+ &readableFiles);
Tcl_ListObjGetElements(NULL, matchFileList, &numFiles, &filev);
for (j=0; j<numFiles; j++) {
@@ -517,7 +522,7 @@ FillEncodingFileMap()
* TclInitEncodingSubsystem --
*
* Initialize all resources used by this subsystem on a per-process
- * basis.
+ * basis.
*
* Results:
* None.
@@ -540,11 +545,11 @@ TclInitEncodingSubsystem()
Tcl_MutexLock(&encodingMutex);
Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
Tcl_MutexUnlock(&encodingMutex);
-
+
/*
- * Create a few initial encodings. Note that the UTF-8 to UTF-8
- * translation is not a no-op, because it will turn a stream of
- * improperly formed UTF-8 into a properly formed stream.
+ * Create a few initial encodings. Note that the UTF-8 to UTF-8
+ * translation is not a no-op, because it will turn a stream of improperly
+ * formed UTF-8 into a properly formed stream.
*/
type.encodingName = "identity";
@@ -599,19 +604,22 @@ TclFinalizeEncodingSubsystem()
Tcl_HashEntry *hPtr;
Tcl_MutexLock(&encodingMutex);
- encodingsInitialized = 0;
+ encodingsInitialized = 0;
FreeEncoding(systemEncoding);
+
hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
while (hPtr != NULL) {
/*
* Call FreeEncoding instead of doing it directly to handle refcounts
- * like escape encodings use. [Bug #524674]
- * Make sure to call Tcl_FirstHashEntry repeatedly so that all
- * encodings are eventually cleaned up.
+ * like escape encodings use. [Bug #524674] Make sure to call
+ * Tcl_FirstHashEntry repeatedly so that all encodings are eventually
+ * cleaned up.
*/
+
FreeEncoding((Tcl_Encoding) Tcl_GetHashValue(hPtr));
hPtr = Tcl_FirstHashEntry(&encodingTable, &search);
}
+
Tcl_DeleteHashTable(&encodingTable);
Tcl_MutexUnlock(&encodingMutex);
}
@@ -621,12 +629,12 @@ TclFinalizeEncodingSubsystem()
*
* Tcl_GetDefaultEncodingDir --
*
- * Legacy public interface to retrieve first directory in the
- * encoding searchPath.
+ * Legacy public interface to retrieve first directory in the encoding
+ * searchPath.
*
* Results:
- * The directory pathname, as a string, or NULL for an empty
- * encoding search path.
+ * The directory pathname, as a string, or NULL for an empty encoding
+ * search path.
*
* Side effects:
* None.
@@ -654,8 +662,8 @@ Tcl_GetDefaultEncodingDir()
*
* Tcl_SetDefaultEncodingDir --
*
- * Legacy public interface to set the first directory in the
- * encoding search path.
+ * Legacy public interface to set the first directory in the encoding
+ * search path.
*
* Results:
* None.
@@ -689,16 +697,16 @@ Tcl_SetDefaultEncodingDir(path)
*
* Results:
* Returns a token that represents the encoding. If the name didn't
- * refer to any known or loadable encoding, NULL is returned. If
- * NULL was returned, an error message is left in interp's result
- * object, unless interp was NULL.
+ * refer to any known or loadable encoding, NULL is returned. If NULL
+ * was returned, an error message is left in interp's result object,
+ * unless interp was NULL.
*
* Side effects:
* The new encoding type is entered into a table visible to all
- * interpreters, keyed off the encoding's name. For each call to
- * this procedure, there should eventually be a call to
- * Tcl_FreeEncoding, so that the database can be cleaned up when
- * encodings aren't needed anymore.
+ * interpreters, keyed off the encoding's name. For each call to this
+ * procedure, there should eventually be a call to Tcl_FreeEncoding, so
+ * that the database can be cleaned up when encodings aren't needed
+ * anymore.
*
*-------------------------------------------------------------------------
*/
@@ -727,6 +735,7 @@ Tcl_GetEncoding(interp, name)
return (Tcl_Encoding) encodingPtr;
}
Tcl_MutexUnlock(&encodingMutex);
+
return LoadEncodingFile(interp, name);
}
@@ -742,8 +751,8 @@ Tcl_GetEncoding(interp, name)
* None.
*
* Side effects:
- * The reference count associated with the encoding is decremented
- * and the encoding may be deleted if nothing is using it anymore.
+ * The reference count associated with the encoding is decremented and
+ * the encoding may be deleted if nothing is using it anymore.
*
*---------------------------------------------------------------------------
*/
@@ -762,15 +771,15 @@ Tcl_FreeEncoding(encoding)
*
* FreeEncoding --
*
- * This procedure is called to release an encoding by procedures
- * that already have the encodingMutex.
+ * This procedure is called to release an encoding by procedures that
+ * already have the encodingMutex.
*
* Results:
* None.
*
* Side effects:
- * The reference count associated with the encoding is decremented
- * and the encoding may be deleted if nothing is using it anymore.
+ * The reference count associated with the encoding is decremented and
+ * the encoding may be deleted if nothing is using it anymore.
*
*----------------------------------------------------------------------
*/
@@ -780,7 +789,7 @@ FreeEncoding(encoding)
Tcl_Encoding encoding;
{
Encoding *encodingPtr;
-
+
encodingPtr = (Encoding *) encoding;
if (encodingPtr == NULL) {
return;
@@ -803,8 +812,8 @@ FreeEncoding(encoding)
*
* Tcl_GetEncodingName --
*
- * Given an encoding, return the name that was used to constuct
- * the encoding.
+ * Given an encoding, return the name that was used to constuct the
+ * encoding.
*
* Results:
* The name of the encoding.
@@ -819,13 +828,11 @@ CONST char *
Tcl_GetEncodingName(encoding)
Tcl_Encoding encoding; /* The encoding whose name to fetch. */
{
- Encoding *encodingPtr;
-
if (encoding == NULL) {
encoding = systemEncoding;
}
- encodingPtr = (Encoding *) encoding;
- return encodingPtr->name;
+
+ return ((Encoding *) encoding)->name;
}
/*
@@ -833,8 +840,8 @@ Tcl_GetEncodingName(encoding)
*
* Tcl_GetEncodingNames --
*
- * Get the list of all known encodings, including the ones stored
- * as files on disk in the encoding path.
+ * Get the list of all known encodings, including the ones stored as
+ * files on disk in the encoding path.
*
* Results:
* Modifies interp's result object to hold a list of all the available
@@ -859,7 +866,10 @@ Tcl_GetEncodingNames(interp)
Tcl_InitObjHashTable(&table);
- /* Copy encoding names from loaded encoding table to table */
+ /*
+ * Copy encoding names from loaded encoding table to table.
+ */
+
Tcl_MutexLock(&encodingMutex);
for (hPtr = Tcl_FirstHashEntry(&encodingTable, &search); hPtr != NULL;
hPtr = Tcl_NextHashEntry(&search)) {
@@ -872,16 +882,22 @@ Tcl_GetEncodingNames(interp)
FillEncodingFileMap();
map = TclGetProcessGlobalValue(&encodingFileMap);
- /* Copy encoding names from encoding file map to table */
+ /*
+ * Copy encoding names from encoding file map to table.
+ */
+
Tcl_DictObjFirst(NULL, map, &mapSearch, &name, NULL, &done);
for (; !done; Tcl_DictObjNext(&mapSearch, &name, NULL, &done)) {
Tcl_CreateHashEntry(&table, (char *) name, &dummy);
}
- /* Pull all encoding names from table into the result list */
+ /*
+ * Pull all encoding names from table into the result list.
+ */
+
for (hPtr = Tcl_FirstHashEntry(&table, &search); hPtr != NULL;
hPtr = Tcl_NextHashEntry(&search)) {
- Tcl_ListObjAppendElement(NULL, result,
+ Tcl_ListObjAppendElement(NULL, result,
(Tcl_Obj *) Tcl_GetHashKey(&table, hPtr));
}
Tcl_SetObjResult(interp, result);
@@ -893,21 +909,21 @@ Tcl_GetEncodingNames(interp)
*
* Tcl_SetSystemEncoding --
*
- * Sets the default encoding that should be used whenever the user
- * passes a NULL value in to one of the conversion routines.
- * If the supplied name is NULL, the system encoding is reset to the
- * default system encoding.
+ * Sets the default encoding that should be used whenever the user passes
+ * a NULL value in to one of the conversion routines. If the supplied
+ * name is NULL, the system encoding is reset to the default system
+ * encoding.
*
* Results:
- * The return value is TCL_OK if the system encoding was successfully
- * set to the encoding specified by name, TCL_ERROR otherwise. If
- * TCL_ERROR is returned, an error message is left in interp's result
- * object, unless interp was NULL.
+ * The return value is TCL_OK if the system encoding was successfully set
+ * to the encoding specified by name, TCL_ERROR otherwise. If TCL_ERROR
+ * is returned, an error message is left in interp's result object,
+ * unless interp was NULL.
*
* Side effects:
- * The reference count of the new system encoding is incremented.
- * The reference count of the old system encoding is decremented and
- * it may be freed.
+ * The reference count of the new system encoding is incremented. The
+ * reference count of the old system encoding is decremented and it may
+ * be freed.
*
*------------------------------------------------------------------------
*/
@@ -948,25 +964,25 @@ Tcl_SetSystemEncoding(interp, name)
* Tcl_CreateEncoding --
*
* This procedure is called to define a new encoding and the procedures
- * that are used to convert between the specified encoding and Unicode.
+ * that are used to convert between the specified encoding and Unicode.
*
* Results:
- * Returns a token that represents the encoding. If an encoding with
- * the same name already existed, the old encoding token remains
- * valid and continues to behave as it used to, and will eventually
- * be garbage collected when the last reference to it goes away. Any
- * subsequent calls to Tcl_GetEncoding with the specified name will
- * retrieve the most recent encoding token.
+ * Returns a token that represents the encoding. If an encoding with the
+ * same name already existed, the old encoding token remains valid and
+ * continues to behave as it used to, and will eventually be garbage
+ * collected when the last reference to it goes away. Any subsequent
+ * calls to Tcl_GetEncoding with the specified name will retrieve the
+ * most recent encoding token.
*
* Side effects:
* The new encoding type is entered into a table visible to all
- * interpreters, keyed off the encoding's name. For each call to
- * this procedure, there should eventually be a call to
- * Tcl_FreeEncoding, so that the database can be cleaned up when
- * encodings aren't needed anymore.
+ * interpreters, keyed off the encoding's name. For each call to this
+ * procedure, there should eventually be a call to Tcl_FreeEncoding, so
+ * that the database can be cleaned up when encodings aren't needed
+ * anymore.
*
*---------------------------------------------------------------------------
- */
+ */
Tcl_Encoding
Tcl_CreateEncoding(typePtr)
@@ -981,16 +997,16 @@ Tcl_CreateEncoding(typePtr)
hPtr = Tcl_CreateHashEntry(&encodingTable, typePtr->encodingName, &new);
if (new == 0) {
/*
- * Remove old encoding from hash table, but don't delete it until
- * last reference goes away.
+ * Remove old encoding from hash table, but don't delete it until last
+ * reference goes away.
*/
-
+
encodingPtr = (Encoding *) Tcl_GetHashValue(hPtr);
encodingPtr->hPtr = NULL;
}
name = ckalloc((unsigned) strlen(typePtr->encodingName) + 1);
-
+
encodingPtr = (Encoding *) ckalloc(sizeof(Encoding));
encodingPtr->name = strcpy(name, typePtr->encodingName);
encodingPtr->toUtfProc = typePtr->toUtfProc;
@@ -1017,15 +1033,15 @@ Tcl_CreateEncoding(typePtr)
*
* Tcl_ExternalToUtfDString --
*
- * Convert a source buffer from the specified encoding into UTF-8.
- * If any of the bytes in the source buffer are invalid or cannot
- * be represented in the target encoding, a default fallback
- * character will be substituted.
+ * Convert a source buffer from the specified encoding into UTF-8. If any
+ * of the bytes in the source buffer are invalid or cannot be represented
+ * in the target encoding, a default fallback character will be
+ * substituted.
*
* Results:
* The converted bytes are stored in the DString, which is then NULL
- * terminated. The return value is a pointer to the value stored
- * in the DString.
+ * terminated. The return value is a pointer to the value stored in the
+ * DString.
*
* Side effects:
* None.
@@ -1033,15 +1049,15 @@ Tcl_CreateEncoding(typePtr)
*-------------------------------------------------------------------------
*/
-char *
+char *
Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
- Tcl_Encoding encoding; /* The encoding for the source string, or
- * NULL for the default system encoding. */
+ Tcl_Encoding encoding; /* The encoding for the source string, or NULL
+ * for the default system encoding. */
CONST char *src; /* Source string in specified encoding. */
int srcLen; /* Source string length in bytes, or < 0 for
* encoding-specific string length. */
- Tcl_DString *dstPtr; /* Uninitialized or free DString in which
- * the converted string is stored. */
+ Tcl_DString *dstPtr; /* Uninitialized or free DString in which the
+ * converted string is stored. */
{
char *dst;
Tcl_EncodingState state;
@@ -1051,7 +1067,7 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
dstLen = dstPtr->spaceAvl - 1;
-
+
if (encoding == NULL) {
encoding = systemEncoding;
}
@@ -1062,16 +1078,20 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
} else if (srcLen < 0) {
srcLen = (*encodingPtr->lengthProc)(src);
}
+
flags = TCL_ENCODING_START | TCL_ENCODING_END;
+
while (1) {
result = (*encodingPtr->toUtfProc)(encodingPtr->clientData, src,
srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
&dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+
if (result != TCL_CONVERT_NOSPACE) {
Tcl_DStringSetLength(dstPtr, soFar);
return Tcl_DStringValue(dstPtr);
}
+
flags &= ~TCL_ENCODING_START;
src += srcRead;
srcLen -= srcRead;
@@ -1093,11 +1113,11 @@ Tcl_ExternalToUtfDString(encoding, src, srcLen, dstPtr)
*
* Results:
* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
- * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
- * as documented in tcl.h.
+ * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as
+ * documented in tcl.h.
*
* Side effects:
- * The converted bytes are stored in the output buffer.
+ * The converted bytes are stored in the output buffer.
*
*-------------------------------------------------------------------------
*/
@@ -1106,19 +1126,19 @@ int
Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
Tcl_Interp *interp; /* Interp for error return, if not NULL. */
- Tcl_Encoding encoding; /* The encoding for the source string, or
- * NULL for the default system encoding. */
+ Tcl_Encoding encoding; /* The encoding for the source string, or NULL
+ * for the default system encoding. */
CONST char *src; /* Source string in specified encoding. */
int srcLen; /* Source string length in bytes, or < 0 for
* encoding-specific string length. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -1136,7 +1156,7 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
Encoding *encodingPtr;
int result, srcRead, dstWrote, dstChars;
Tcl_EncodingState state;
-
+
if (encoding == NULL) {
encoding = systemEncoding;
}
@@ -1163,8 +1183,8 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
/*
* If there are any null characters in the middle of the buffer, they will
- * converted to the UTF-8 null character (\xC080). To get the actual
- * \0 at the end of the destination buffer, we need to append it manually.
+ * converted to the UTF-8 null character (\xC080). To get the actual \0 at
+ * the end of the destination buffer, we need to append it manually.
*/
dstLen--;
@@ -1172,6 +1192,7 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
flags, statePtr, dst, dstLen, srcReadPtr, dstWrotePtr,
dstCharsPtr);
dst[*dstWrotePtr] = '\0';
+
return result;
}
@@ -1180,15 +1201,15 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
*
* Tcl_UtfToExternalDString --
*
- * Convert a source buffer from UTF-8 into the specified encoding.
- * If any of the bytes in the source buffer are invalid or cannot
- * be represented in the target encoding, a default fallback
- * character will be substituted.
+ * Convert a source buffer from UTF-8 into the specified encoding. If
+ * any of the bytes in the source buffer are invalid or cannot be
+ * represented in the target encoding, a default fallback character will
+ * be substituted.
*
* Results:
- * The converted bytes are stored in the DString, which is then
- * NULL terminated in an encoding-specific manner. The return value
- * is a pointer to the value stored in the DString.
+ * The converted bytes are stored in the DString, which is then NULL
+ * terminated in an encoding-specific manner. The return value is a
+ * pointer to the value stored in the DString.
*
* Side effects:
* None.
@@ -1198,19 +1219,19 @@ Tcl_ExternalToUtf(interp, encoding, src, srcLen, flags, statePtr, dst,
char *
Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
- Tcl_Encoding encoding; /* The encoding for the converted string,
- * or NULL for the default system encoding. */
+ Tcl_Encoding encoding; /* The encoding for the converted string, or
+ * NULL for the default system encoding. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes, or < 0 for
* strlen(). */
- Tcl_DString *dstPtr; /* Uninitialized or free DString in which
- * the converted string is stored. */
+ Tcl_DString *dstPtr; /* Uninitialized or free DString in which the
+ * converted string is stored. */
{
char *dst;
Tcl_EncodingState state;
Encoding *encodingPtr;
int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
-
+
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
dstLen = dstPtr->spaceAvl - 1;
@@ -1231,13 +1252,15 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
srcLen, flags, &state, dst, dstLen, &srcRead, &dstWrote,
&dstChars);
soFar = dst + dstWrote - Tcl_DStringValue(dstPtr);
+
if (result != TCL_CONVERT_NOSPACE) {
if (encodingPtr->nullSize == 2) {
- Tcl_DStringSetLength(dstPtr, soFar + 1);
+ Tcl_DStringSetLength(dstPtr, soFar + 1);
}
Tcl_DStringSetLength(dstPtr, soFar);
return Tcl_DStringValue(dstPtr);
}
+
flags &= ~TCL_ENCODING_START;
src += srcRead;
srcLen -= srcRead;
@@ -1259,11 +1282,11 @@ Tcl_UtfToExternalDString(encoding, src, srcLen, dstPtr)
*
* Results:
* The return value is one of TCL_OK, TCL_CONVERT_MULTIBYTE,
- * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE,
- * as documented in tcl.h.
+ * TCL_CONVERT_SYNTAX, TCL_CONVERT_UNKNOWN, or TCL_CONVERT_NOSPACE, as
+ * documented in tcl.h.
*
* Side effects:
- * The converted bytes are stored in the output buffer.
+ * The converted bytes are stored in the output buffer.
*
*-------------------------------------------------------------------------
*/
@@ -1272,14 +1295,14 @@ int
Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
dstLen, srcReadPtr, dstWrotePtr, dstCharsPtr)
Tcl_Interp *interp; /* Interp for error return, if not NULL. */
- Tcl_Encoding encoding; /* The encoding for the converted string,
- * or NULL for the default system encoding. */
+ Tcl_Encoding encoding; /* The encoding for the converted string, or
+ * NULL for the default system encoding. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes, or < 0 for
* strlen(). */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
@@ -1302,7 +1325,7 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
Encoding *encodingPtr;
int result, srcRead, dstWrote, dstChars;
Tcl_EncodingState state;
-
+
if (encoding == NULL) {
encoding = systemEncoding;
}
@@ -1335,7 +1358,7 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
dst[*dstWrotePtr + 1] = '\0';
}
dst[*dstWrotePtr] = '\0';
-
+
return result;
}
@@ -1351,8 +1374,8 @@ Tcl_UtfToExternal(interp, encoding, src, srcLen, flags, statePtr, dst,
* None.
*
* Side effects:
- * The absolute pathname for the application is computed and stored
- * to be returned later be [info nameofexecutable].
+ * The absolute pathname for the application is computed and stored to be
+ * returned later be [info nameofexecutable].
*
*---------------------------------------------------------------------------
*/
@@ -1375,14 +1398,14 @@ Tcl_FindExecutable(argv0)
* Open the file believed to hold data for the encoding, "name".
*
* Results:
- * Returns the readable Tcl_Channel from opening the file, or NULL
- * if the file could not be successfully opened. If NULL was
- * returned, an error message is left in interp's result object,
- * unless interp was NULL.
+ * Returns the readable Tcl_Channel from opening the file, or NULL if the
+ * file could not be successfully opened. If NULL was * returned, an
+ * error message is left in interp's result object, * unless interp was
+ * NULL.
*
* Side effects:
- * Channel may be opened. Information about the filesystem may be
- * cached to speed later calls.
+ * Channel may be opened. Information about the filesystem may be cached
+ * to speed later calls.
*
*---------------------------------------------------------------------------
*/
@@ -1390,8 +1413,8 @@ Tcl_FindExecutable(argv0)
static Tcl_Channel
OpenEncodingFileChannel(interp, name)
Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */
- CONST char *name; /* The name of the encoding file on disk
- * and also the name for new encoding. */
+ CONST char *name; /* The name of the encoding file on disk and
+ * also the name for new encoding. */
{
Tcl_Obj *nameObj = Tcl_NewStringObj(name, -1);
Tcl_Obj *fileNameObj = Tcl_DuplicateObj(nameObj);
@@ -1407,7 +1430,10 @@ OpenEncodingFileChannel(interp, name)
Tcl_IncrRefCount(fileNameObj);
Tcl_DictObjGet(NULL, map, nameObj, &directory);
- /* Check that any cached directory is still on the encoding search path */
+ /*
+ * Check that any cached directory is still on the encoding search path.
+ */
+
if (NULL != directory) {
int verified = 0;
@@ -1425,7 +1451,10 @@ OpenEncodingFileChannel(interp, name)
}
}
if (!verified) {
- /* Directory no longer on the search path. Remove from cache */
+ /*
+ * Directory no longer on the search path. Remove from cache.
+ */
+
map = Tcl_DuplicateObj(map);
Tcl_DictObjRemove(NULL, map, nameObj);
TclSetProcessGlobalValue(&encodingFileMap, map, NULL);
@@ -1434,7 +1463,10 @@ OpenEncodingFileChannel(interp, name)
}
if (NULL != directory) {
- /* Got a directory from the cache. Try to use it first */
+ /*
+ * Got a directory from the cache. Try to use it first.
+ */
+
Tcl_IncrRefCount(directory);
path = Tcl_FSJoinToPath(directory, 1, &fileNameObj);
Tcl_IncrRefCount(path);
@@ -1443,25 +1475,33 @@ OpenEncodingFileChannel(interp, name)
Tcl_DecrRefCount(path);
}
- /* Scan the search path until we find it. */
+ /*
+ * Scan the search path until we find it.
+ */
+
for (i=0; i<numDirs && (chan == NULL); i++) {
path = Tcl_FSJoinToPath(dir[i], 1, &fileNameObj);
Tcl_IncrRefCount(path);
chan = Tcl_FSOpenFileChannel(NULL, path, "r", 0);
Tcl_DecrRefCount(path);
if (chan != NULL) {
- /* Save directory in the cache */
+ /*
+ * Save directory in the cache.
+ */
+
map = Tcl_DuplicateObj(TclGetProcessGlobalValue(&encodingFileMap));
Tcl_DictObjPut(NULL, map, nameObj, dir[i]);
TclSetProcessGlobalValue(&encodingFileMap, map, NULL);
}
}
+
if ((NULL == chan) && (interp != NULL)) {
Tcl_AppendResult(interp, "unknown encoding \"", name, "\"", NULL);
}
Tcl_DecrRefCount(fileNameObj);
Tcl_DecrRefCount(nameObj);
Tcl_DecrRefCount(searchPath);
+
return chan;
}
@@ -1470,17 +1510,17 @@ OpenEncodingFileChannel(interp, name)
*
* LoadEncodingFile --
*
- * Read a file that describes an encoding and create a new Encoding
- * from the data.
+ * Read a file that describes an encoding and create a new Encoding from
+ * the data.
*
* Results:
- * The return value is the newly loaded Encoding, or NULL if
- * the file didn't exist of was in the incorrect format. If NULL was
- * returned, an error message is left in interp's result object,
- * unless interp was NULL.
+ * The return value is the newly loaded Encoding, or NULL if the file
+ * didn't exist of was in the incorrect format. If NULL was returned, an
+ * error message is left in interp's result object, unless interp was
+ * NULL.
*
* Side effects:
- * File read from disk.
+ * File read from disk.
*
*---------------------------------------------------------------------------
*/
@@ -1488,8 +1528,8 @@ OpenEncodingFileChannel(interp, name)
static Tcl_Encoding
LoadEncodingFile(interp, name)
Tcl_Interp *interp; /* Interp for error reporting, if not NULL. */
- CONST char *name; /* The name of the encoding file on disk
- * and also the name for new encoding. */
+ CONST char *name; /* The name of the encoding file on disk and
+ * also the name for new encoding. */
{
Tcl_Channel chan = NULL;
Tcl_Encoding encoding = NULL;
@@ -1515,27 +1555,24 @@ LoadEncodingFile(interp, name)
}
switch (ch) {
- case 'S': {
- encoding = LoadTableEncoding(name, ENCODING_SINGLEBYTE, chan);
- break;
- }
- case 'D': {
- encoding = LoadTableEncoding(name, ENCODING_DOUBLEBYTE, chan);
- break;
- }
- case 'M': {
- encoding = LoadTableEncoding(name, ENCODING_MULTIBYTE, chan);
- break;
- }
- case 'E': {
- encoding = LoadEscapeEncoding(name, chan);
- break;
- }
+ case 'S':
+ encoding = LoadTableEncoding(name, ENCODING_SINGLEBYTE, chan);
+ break;
+ case 'D':
+ encoding = LoadTableEncoding(name, ENCODING_DOUBLEBYTE, chan);
+ break;
+ case 'M':
+ encoding = LoadTableEncoding(name, ENCODING_MULTIBYTE, chan);
+ break;
+ case 'E':
+ encoding = LoadEscapeEncoding(name, chan);
+ break;
}
if ((encoding == NULL) && (interp != NULL)) {
Tcl_AppendResult(interp, "invalid encoding file \"", name, "\"", NULL);
}
Tcl_Close(NULL, chan);
+
return encoding;
}
@@ -1544,17 +1581,17 @@ LoadEncodingFile(interp, name)
*
* LoadTableEncoding --
*
- * Helper function for LoadEncodingTable(). Loads a table to that
- * converts between Unicode and some other encoding and creates an
+ * Helper function for LoadEncodingTable(). Loads a table to that
+ * converts between Unicode and some other encoding and creates an
* encoding (using a TableEncoding structure) from that information.
*
- * File contains binary data, but begins with a marker to indicate
- * byte-ordering, so that same binary file can be read on either
- * endian platforms.
+ * File contains binary data, but begins with a marker to indicate
+ * byte-ordering, so that same binary file can be read on either endian
+ * platforms.
*
* Results:
- * The return value is the new encoding, or NULL if the encoding
- * could not be created (because the file contained invalid data).
+ * The return value is the new encoding, or NULL if the encoding could
+ * not be created (because the file contained invalid data).
*
* Side effects:
* None.
@@ -1629,8 +1666,8 @@ LoadTableEncoding(name, type, chan)
/*
* Read the table that maps characters to Unicode. Performs a single
- * malloc to get the memory for the array and all the pages needed by
- * the array.
+ * malloc to get the memory for the array and all the pages needed by the
+ * array.
*/
size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
@@ -1646,15 +1683,15 @@ LoadTableEncoding(name, type, chan)
Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
p = Tcl_GetString(objPtr);
- hi = (staticHex[(unsigned int)p[0]] << 4) + staticHex[(unsigned int)p[1]];
+ hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])];
dataPtr->toUnicode[hi] = pageMemPtr;
p += 2;
for (lo = 0; lo < 256; lo++) {
if ((lo & 0x0f) == 0) {
p++;
}
- ch = (staticHex[(unsigned int)p[0]] << 12) + (staticHex[(unsigned int)p[1]] << 8)
- + (staticHex[(unsigned int)p[2]] << 4) + staticHex[(unsigned int)p[3]];
+ ch = (staticHex[UCHAR(p[0])] << 12) + (staticHex[UCHAR(p[1])] << 8)
+ + (staticHex[UCHAR(p[2])] << 4) + staticHex[UCHAR(p[3])];
if (ch != 0) {
used[ch >> 8] = 1;
}
@@ -1664,7 +1701,7 @@ LoadTableEncoding(name, type, chan)
}
}
TclDecrRefCount(objPtr);
-
+
if (type == ENCODING_DOUBLEBYTE) {
memset(dataPtr->prefixBytes, 1, sizeof(dataPtr->prefixBytes));
} else {
@@ -1677,9 +1714,9 @@ LoadTableEncoding(name, type, chan)
/*
* Invert toUnicode array to produce the fromUnicode array. Performs a
- * single malloc to get the memory for the array and all the pages
- * needed by the array. While reading in the toUnicode array, we
- * remembered what pages that would be needed for the fromUnicode array.
+ * single malloc to get the memory for the array and all the pages needed
+ * by the array. While reading in the toUnicode array, we remembered what
+ * pages that would be needed for the fromUnicode array.
*/
if (symbol) {
@@ -1706,7 +1743,7 @@ LoadTableEncoding(name, type, chan)
ch = dataPtr->toUnicode[hi][lo];
if (ch != 0) {
unsigned short *page;
-
+
page = dataPtr->fromUnicode[ch >> 8];
if (page == NULL) {
page = pageMemPtr;
@@ -1734,16 +1771,15 @@ LoadTableEncoding(name, type, chan)
}
if (symbol) {
unsigned short *page;
-
+
/*
* Make a special symbol encoding that not only maps the symbol
* characters from their Unicode code points down into page 0, but
- * also ensure that the characters on page 0 map to themselves.
- * This is so that a symbol font can be used to display a simple
- * string like "abcd" and have alpha, beta, chi, delta show up,
- * rather than have "unknown" chars show up because strictly
- * speaking the symbol font doesn't have glyphs for those low ascii
- * chars.
+ * also ensure that the characters on page 0 map to themselves. This
+ * is so that a symbol font can be used to display a simple string
+ * like "abcd" and have alpha, beta, chi, delta show up, rather than
+ * have "unknown" chars show up because strictly speaking the symbol
+ * font doesn't have glyphs for those low ascii chars.
*/
page = dataPtr->fromUnicode[0];
@@ -1762,15 +1798,22 @@ LoadTableEncoding(name, type, chan)
dataPtr->fromUnicode[hi] = emptyPage;
}
}
+
/*
* For trailing 'R'everse encoding, see [Patch #689341]
*/
+
Tcl_DStringInit(&lineString);
do {
int len;
- /* skip leading empty lines */
+
+ /*
+ * Skip leading empty lines.
+ */
+
while ((len = Tcl_Gets(chan, &lineString)) == 0)
;
+
if (len < 0) {
break;
}
@@ -1779,16 +1822,17 @@ LoadTableEncoding(name, type, chan)
break;
}
for (Tcl_DStringSetLength(&lineString, 0);
- (len = Tcl_Gets(chan, &lineString)) >= 0;
- Tcl_DStringSetLength(&lineString, 0)) {
+ (len = Tcl_Gets(chan, &lineString)) >= 0;
+ Tcl_DStringSetLength(&lineString, 0)) {
unsigned char* p;
int to, from;
+
if (len < 5) {
continue;
}
p = (unsigned char*) Tcl_DStringValue(&lineString);
to = (staticHex[p[0]] << 12) + (staticHex[p[1]] << 8)
- + (staticHex[p[2]] << 4) + staticHex[p[3]];
+ + (staticHex[p[2]] << 4) + staticHex[p[3]];
if (to == 0) {
continue;
}
@@ -1810,6 +1854,7 @@ LoadTableEncoding(name, type, chan)
encType.freeProc = TableFreeProc;
encType.nullSize = (type == ENCODING_DOUBLEBYTE) ? 2 : 1;
encType.clientData = (ClientData) dataPtr;
+
return Tcl_CreateEncoding(&encType);
}
@@ -1818,16 +1863,16 @@ LoadTableEncoding(name, type, chan)
*
* LoadEscapeEncoding --
*
- * Helper function for LoadEncodingTable(). Loads a state machine
- * that converts between Unicode and some other encoding.
+ * Helper function for LoadEncodingTable(). Loads a state machine that
+ * converts between Unicode and some other encoding.
*
- * File contains text data that describes the escape sequences that
- * are used to choose an encoding and the associated names for the
+ * File contains text data that describes the escape sequences that are
+ * used to choose an encoding and the associated names for the
* sub-encodings.
*
* Results:
- * The return value is the new encoding, or NULL if the encoding
- * could not be created (because the file contained invalid data).
+ * The return value is the new encoding, or NULL if the encoding could
+ * not be created (because the file contained invalid data).
*
* Side effects:
* None.
@@ -1856,13 +1901,13 @@ LoadEscapeEncoding(name, chan)
CONST char **argv;
char *line;
Tcl_DString lineString;
-
+
Tcl_DStringInit(&lineString);
if (Tcl_Gets(chan, &lineString) < 0) {
break;
}
line = Tcl_DStringValue(&lineString);
- if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
+ if (Tcl_SplitList(NULL, line, &argc, &argv) != TCL_OK) {
continue;
}
if (argc >= 2) {
@@ -1884,7 +1929,10 @@ LoadEscapeEncoding(name, chan)
strncpy(est.name, argv[0], sizeof(est.name));
est.name[sizeof(est.name) - 1] = '\0';
- /* To avoid infinite recursion in [encoding system iso2022-*]*/
+ /*
+ * To avoid infinite recursion in [encoding system iso2022-*]
+ */
+
Tcl_GetEncoding(NULL, est.name);
est.encodingPtr = NULL;
@@ -1895,14 +1943,15 @@ LoadEscapeEncoding(name, chan)
Tcl_DStringFree(&lineString);
}
- size = sizeof(EscapeEncodingData)
- - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData);
+ size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable)
+ + Tcl_DStringLength(&escapeData);
dataPtr = (EscapeEncodingData *) ckalloc(size);
dataPtr->initLen = strlen(init);
strcpy(dataPtr->init, init);
dataPtr->finalLen = strlen(final);
strcpy(dataPtr->final, final);
- dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
+ dataPtr->numSubTables =
+ Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
memcpy((VOID *) dataPtr->subTables, (VOID *) Tcl_DStringValue(&escapeData),
(size_t) Tcl_DStringLength(&escapeData));
Tcl_DStringFree(&escapeData);
@@ -1933,9 +1982,9 @@ LoadEscapeEncoding(name, chan)
*
* BinaryProc --
*
- * The default conversion when no other conversion is specified.
- * No translation is done; source bytes are copied directly to
- * destination bytes.
+ * The default conversion when no other conversion is specified. No
+ * translation is done; source bytes are copied directly to destination
+ * bytes.
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -1953,13 +2002,13 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *src; /* Source string (unknown encoding). */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -1992,14 +2041,13 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
return result;
}
-
/*
*-------------------------------------------------------------------------
*
* UtfExtToUtfIntProc --
*
- * Convert from UTF-8 to UTF-8. While converting null-bytes from
- * the Tcl's internal representation (0xc0, 0x80) to the official
+ * Convert from UTF-8 to UTF-8. While converting null-bytes from the
+ * Tcl's internal representation (0xc0, 0x80) to the official
* representation (0x00). See UtfToUtfProc for details.
*
* Results:
@@ -2010,15 +2058,16 @@ BinaryProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
*-------------------------------------------------------------------------
*/
-static int
+
+static int
UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
- srcReadPtr, dstWrotePtr, dstCharsPtr)
+ srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* Not used. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
@@ -2039,7 +2088,7 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
* output buffer. */
{
return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
- srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
+ srcReadPtr, dstWrotePtr, dstCharsPtr, 1);
}
/*
@@ -2059,20 +2108,20 @@ UtfIntToUtfExtProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
*-------------------------------------------------------------------------
*/
-static int
+static int
UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
- srcReadPtr, dstWrotePtr, dstCharsPtr)
+ srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* Not used. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2088,7 +2137,7 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
* output buffer. */
{
return UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
- srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
+ srcReadPtr, dstWrotePtr, dstCharsPtr, 0);
}
/*
@@ -2096,9 +2145,9 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
* UtfToUtfProc --
*
- * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8
- * translation is not a no-op, because it will turn a stream of
- * improperly formed UTF-8 into a properly formed stream.
+ * Convert from UTF-8 to UTF-8. Note that the UTF-8 to UTF-8 translation
+ * is not a no-op, because it will turn a stream of improperly formed
+ * UTF-8 into a properly formed stream.
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -2109,37 +2158,36 @@ UtfExtToUtfIntProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
- srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode)
+ srcReadPtr, dstWrotePtr, dstCharsPtr, pureNullMode)
ClientData clientData; /* Not used. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
- * source string that were converted. This
- * may be less than the original source length
- * if there was a problem converting some
- * source characters. */
+ * source string that were converted. This may
+ * be less than the original source length if
+ * there was a problem converting some source
+ * characters. */
int *dstWrotePtr; /* Filled with the number of bytes that were
* stored in the output buffer as a result of
* the conversion. */
int *dstCharsPtr; /* Filled with the number of characters that
* correspond to the bytes stored in the
* output buffer. */
- int pureNullMode; /* Convert embedded nulls from
- * internal representation to real
- * null-bytes or vice versa */
-
+ int pureNullMode; /* Convert embedded nulls from internal
+ * representation to real null-bytes or vice
+ * versa. */
{
CONST char *srcStart, *srcEnd, *srcClose;
char *dstStart, *dstEnd;
@@ -2147,7 +2195,7 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
Tcl_UniChar ch;
result = TCL_OK;
-
+
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -2172,19 +2220,19 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
result = TCL_CONVERT_NOSPACE;
break;
}
- if (UCHAR(*src) < 0x80 &&
- !(UCHAR(*src) == 0 && pureNullMode == 0)) {
+ if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) {
/*
- * Copy 7bit chatacters, but skip null-bytes when we are
- * in input mode, so that they get converted to 0xc080.
+ * Copy 7bit chatacters, but skip null-bytes when we are in input
+ * mode, so that they get converted to 0xc080.
*/
+
*dst++ = *src++;
- } else if (pureNullMode == 1 &&
- UCHAR(*src) == 0xc0 &&
- UCHAR(*(src+1)) == 0x80) {
- /*
+ } else if (pureNullMode == 1 && UCHAR(*src) == 0xc0 &&
+ UCHAR(*(src+1)) == 0x80) {
+ /*
* Convert 0xc080 to real nulls when we are in output mode.
*/
+
*dst++ = 0;
src += 2;
} else {
@@ -2215,20 +2263,20 @@ UtfToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* Not used. */
CONST char *src; /* Source string in Unicode. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2246,7 +2294,7 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
char *dstEnd, *dstStart;
int result, numChars;
-
+
result = TCL_OK;
if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
result = TCL_CONVERT_MULTIBYTE;
@@ -2267,9 +2315,11 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
result = TCL_CONVERT_NOSPACE;
break;
}
+
/*
* Special case for 1-byte utf chars for speed.
*/
+
if (*wSrc && *wSrc < 0x80) {
*dst++ = (char) *wSrc;
} else {
@@ -2300,20 +2350,21 @@ UnicodeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
- ClientData clientData; /* TableEncodingData that specifies encoding. */
+ ClientData clientData; /* TableEncodingData that specifies
+ * encoding. */
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2331,7 +2382,7 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *srcStart, *srcEnd, *srcClose;
Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
int result, numChars;
-
+
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -2357,10 +2408,11 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
if (wDst > wDstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
- }
+ }
src += TclUtfToUniChar(src, wDst);
wDst++;
}
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = (char *) wDst - (char *) wDstStart;
*dstCharsPtr = numChars;
@@ -2384,7 +2436,7 @@ UtfToUnicodeProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* TableEncodingData that specifies
@@ -2392,13 +2444,13 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *src; /* Source string in specified encoding. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2420,7 +2472,7 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
unsigned short **toUnicode;
unsigned short *pageZero;
TableEncodingData *dataPtr;
-
+
srcStart = src;
srcEnd = src + srcLen;
@@ -2434,10 +2486,10 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
result = TCL_OK;
for (numChars = 0; src < srcEnd; numChars++) {
- if (dst > dstEnd) {
- result = TCL_CONVERT_NOSPACE;
- break;
- }
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ break;
+ }
byte = *((unsigned char *) src);
if (prefixBytes[byte]) {
src++;
@@ -2468,8 +2520,9 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
} else {
dst += Tcl_UniCharToUtf(ch, dst);
}
- src++;
+ src++;
}
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2493,7 +2546,7 @@ TableToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* TableEncodingData that specifies
@@ -2501,13 +2554,13 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2528,13 +2581,13 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
int result, len, word, numChars;
TableEncodingData *dataPtr;
unsigned short **fromUnicode;
-
- result = TCL_OK;
+
+ result = TCL_OK;
dataPtr = (TableEncodingData *) clientData;
prefixBytes = dataPtr->prefixBytes;
fromUnicode = dataPtr->fromUnicode;
-
+
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -2559,9 +2612,10 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
#if TCL_UTF_MAX > 3
/*
- * This prevents a crash condition. More evaluation is required
- * for full support of int Tcl_UniChar. [Bug 1004065]
+ * This prevents a crash condition. More evaluation is required for
+ * full support of int Tcl_UniChar. [Bug 1004065]
*/
+
if (ch & 0xffff0000) {
word = 0;
} else
@@ -2573,7 +2627,7 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
result = TCL_CONVERT_UNKNOWN;
break;
}
- word = dataPtr->fallback;
+ word = dataPtr->fallback;
}
if (prefixBytes[(word >> 8)] != 0) {
if (dst + 1 > dstEnd) {
@@ -2590,9 +2644,10 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
}
dst[0] = (char) word;
dst++;
- }
+ }
src += len;
}
+
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
@@ -2604,8 +2659,8 @@ TableFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
* TableFreeProc --
*
- * This procedure is invoked when an encoding is deleted. It deletes
- * the memory used by the TableEncodingData.
+ * This procedure is invoked when an encoding is deleted. It deletes the
+ * memory used by the TableEncodingData.
*
* Results:
* None.
@@ -2650,7 +2705,7 @@ TableFreeProc(clientData)
*-------------------------------------------------------------------------
*/
-static int
+static int
EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* EscapeEncodingData that specifies
@@ -2658,13 +2713,13 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *src; /* Source string in specified encoding. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
@@ -2710,54 +2765,56 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
for (numChars = 0; src < srcEnd; ) {
int byte, hi, lo, ch;
- if (dst > dstEnd) {
- result = TCL_CONVERT_NOSPACE;
- break;
- }
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ break;
+ }
byte = *((unsigned char *) src);
if (prefixBytes[byte]) {
unsigned int left, len, longest;
int checked, i;
EscapeSubTable *subTablePtr;
-
+
/*
- * Saw the beginning of an escape sequence.
+ * Saw the beginning of an escape sequence.
*/
-
+
left = srcEnd - src;
len = dataPtr->initLen;
longest = len;
checked = 0;
+
if (len <= left) {
checked++;
- if ((len > 0) &&
- (memcmp(src, dataPtr->init, len) == 0)) {
+ if ((len > 0) && (memcmp(src, dataPtr->init, len) == 0)) {
/*
* If we see initialization string, skip it, even if we're
- * not at the beginning of the buffer.
+ * not at the beginning of the buffer.
*/
-
+
src += len;
continue;
}
}
+
len = dataPtr->finalLen;
if (len > longest) {
longest = len;
}
+
if (len <= left) {
checked++;
- if ((len > 0) &&
- (memcmp(src, dataPtr->final, len) == 0)) {
+ if ((len > 0) && (memcmp(src, dataPtr->final, len) == 0)) {
/*
* If we see finalization string, skip it, even if we're
- * not at the end of the buffer.
+ * not at the end of the buffer.
*/
-
+
src += len;
continue;
}
}
+
subTablePtr = dataPtr->subTables;
for (i = 0; i < dataPtr->numSubTables; i++) {
len = subTablePtr->sequenceLen;
@@ -2766,7 +2823,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
}
if (len <= left) {
checked++;
- if ((len > 0) &&
+ if ((len > 0) &&
(memcmp(src, subTablePtr->sequence, len) == 0)) {
state = i;
encodingPtr = NULL;
@@ -2777,6 +2834,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
}
subTablePtr++;
}
+
if (subTablePtr == NULL) {
/*
* A match was found, the escape sequence was consumed, and
@@ -2788,8 +2846,8 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
/*
* We have a split-up or unrecognized escape sequence. If we
- * checked all the sequences, then it's a syntax error,
- * otherwise we need more bytes to determine a match.
+ * checked all the sequences, then it's a syntax error, otherwise
+ * we need more bytes to determine a match.
*/
if ((checked == dataPtr->numSubTables + 2)
@@ -2817,6 +2875,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
tablePrefixBytes = tableDataPtr->prefixBytes;
tableToUnicode = tableDataPtr->toUnicode;
}
+
if (tablePrefixBytes[byte]) {
src++;
if (src >= srcEnd) {
@@ -2830,6 +2889,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
hi = 0;
lo = byte;
}
+
ch = tableToUnicode[hi][lo];
dst += Tcl_UniCharToUtf(ch, dst);
src++;
@@ -2860,7 +2920,7 @@ EscapeToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*-------------------------------------------------------------------------
*/
-static int
+static int
EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcReadPtr, dstWrotePtr, dstCharsPtr)
ClientData clientData; /* EscapeEncodingData that specifies
@@ -2868,20 +2928,20 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
CONST char *src; /* Source string in UTF-8. */
int srcLen; /* Source string length in bytes. */
int flags; /* Conversion control flags. */
- Tcl_EncodingState *statePtr;/* Place for conversion routine to store
- * state information used during a piecewise
+ Tcl_EncodingState *statePtr;/* Place for conversion routine to store state
+ * information used during a piecewise
* conversion. Contents of statePtr are
* initialized and/or reset by conversion
* routine under control of flags argument. */
- char *dst; /* Output buffer in which converted string
- * is stored. */
+ char *dst; /* Output buffer in which converted string is
+ * stored. */
int dstLen; /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr; /* Filled with the number of bytes from the
- * source string that were converted. This
- * may be less than the original source length
- * if there was a problem converting some
- * source characters. */
+ * source string that were converted. This may
+ * be less than the original source length if
+ * there was a problem converting some source
+ * characters. */
int *dstWrotePtr; /* Filled with the number of bytes that were
* stored in the output buffer as a result of
* the conversion. */
@@ -2897,8 +2957,8 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
TableEncodingData *tableDataPtr;
char *tablePrefixBytes;
unsigned short **tableFromUnicode;
-
- result = TCL_OK;
+
+ result = TCL_OK;
dataPtr = (EscapeEncodingData *) clientData;
@@ -2924,11 +2984,10 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*dstWrotePtr = 0;
return TCL_CONVERT_NOSPACE;
}
- memcpy((VOID *) dst, (VOID *) dataPtr->init,
- (size_t) dataPtr->initLen);
+ memcpy((VOID *)dst, (VOID *)dataPtr->init, (size_t)dataPtr->initLen);
dst += dataPtr->initLen;
} else {
- state = (int) *statePtr;
+ state = (int) *statePtr;
}
encodingPtr = GetTableEncoding(dataPtr, state);
@@ -2940,7 +2999,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
unsigned int len;
int word;
Tcl_UniChar ch;
-
+
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
* If there is more string to follow, this will ensure that the
@@ -2956,7 +3015,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
if ((word == 0) && (ch != 0)) {
int oldState;
EscapeSubTable *subTablePtr;
-
+
oldState = state;
for (state = 0; state < dataPtr->numSubTables; state++) {
encodingPtr = GetTableEncoding(dataPtr, state);
@@ -2976,16 +3035,17 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
encodingPtr = GetTableEncoding(dataPtr, state);
tableDataPtr = (TableEncodingData *) encodingPtr->clientData;
word = tableDataPtr->fallback;
- }
-
+ }
+
tablePrefixBytes = tableDataPtr->prefixBytes;
tableFromUnicode = tableDataPtr->fromUnicode;
/*
* The state variable has the value of oldState when word is 0.
- * In this case, the escape sequense should not be copied to dst
+ * In this case, the escape sequense should not be copied to dst
* because the current character set is not changed.
*/
+
if (state != oldState) {
subTablePtr = &dataPtr->subTables[state];
if ((dst + subTablePtr->sequenceLen) > dstEnd) {
@@ -2995,6 +3055,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
* variable because this escape sequence must be written
* in the next conversion.
*/
+
state = oldState;
result = TCL_CONVERT_NOSPACE;
break;
@@ -3020,7 +3081,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
}
dst[0] = (char) word;
dst++;
- }
+ }
src += len;
}
@@ -3052,7 +3113,7 @@ EscapeFromUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
* EscapeFreeProc --
*
- * This procedure is invoked when an EscapeEncodingData encoding is
+ * This procedure is invoked when an EscapeEncodingData encoding is
* deleted. It deletes the memory used by the encoding.
*
* Results:
@@ -3097,9 +3158,9 @@ EscapeFreeProc(clientData)
* The return value is the encoding.
*
* Side effects:
- * If the encoding that represents the specified state has not
- * already been used by this EscapeEncoding, it will be loaded
- * and cached in the dataPtr.
+ * If the encoding that represents the specified state has not already
+ * been used by this EscapeEncoding, it will be loaded and cached in the
+ * dataPtr.
*
*---------------------------------------------------------------------------
*/
@@ -3111,17 +3172,19 @@ GetTableEncoding(dataPtr, state)
{
EscapeSubTable *subTablePtr;
Encoding *encodingPtr;
-
+
subTablePtr = &dataPtr->subTables[state];
encodingPtr = subTablePtr->encodingPtr;
+
if (encodingPtr == NULL) {
encodingPtr = (Encoding *) Tcl_GetEncoding(NULL, subTablePtr->name);
- if ((encodingPtr == NULL)
+ if ((encodingPtr == NULL)
|| (encodingPtr->toUtfProc != TableToUtfProc)) {
Tcl_Panic("EscapeToUtfProc: invalid sub table");
}
subTablePtr->encodingPtr = encodingPtr;
}
+
return encodingPtr;
}
@@ -3130,9 +3193,9 @@ GetTableEncoding(dataPtr, state)
*
* unilen --
*
- * A helper function for the Tcl_ExternalToUtf functions. This
- * function is similar to strlen for double-byte characters: it
- * returns the number of bytes in a 0x0000 terminated string.
+ * A helper function for the Tcl_ExternalToUtf functions. This function
+ * is similar to strlen for double-byte characters: it returns the number
+ * of bytes in a 0x0000 terminated string.
*
* Results:
* As above.
@@ -3161,28 +3224,27 @@ unilen(src)
*
* InitializeEncodingSearchPath --
*
- * This is the fallback routine that sets the default value
- * of the encoding search path if the application has not set
- * one via a call to TclSetEncodingSearchPath() by the first
- * time the search path is needed to load encoding data.
+ * This is the fallback routine that sets the default value of the
+ * encoding search path if the application has not set one via a call to
+ * TclSetEncodingSearchPath() by the first time the search path is needed
+ * to load encoding data.
*
- * The default encoding search path is produced by taking each
- * directory in the library path, appending a subdirectory
- * named "encoding", and if the resulting directory exists,
- * adding it to the encoding search path.
+ * The default encoding search path is produced by taking each directory
+ * in the library path, appending a subdirectory named "encoding", and if
+ * the resulting directory exists, adding it to the encoding search path.
*
* Results:
* None.
*
* Side effects:
- * Sets the encoding search path to an initial value.
+ * Sets the encoding search path to an initial value.
*
*-------------------------------------------------------------------------
*/
void
InitializeEncodingSearchPath(valuePtr, lengthPtr, encodingPtr)
- char **valuePtr;
+ char **valuePtr;
int *lengthPtr;
Tcl_Encoding *encodingPtr;
{
@@ -3196,18 +3258,20 @@ InitializeEncodingSearchPath(valuePtr, lengthPtr, encodingPtr)
libPath = TclGetLibraryPath();
Tcl_IncrRefCount(libPath);
Tcl_ListObjLength(NULL, libPath, &numDirs);
+
for (i = 0; i < numDirs; i++) {
Tcl_Obj *directory, *path;
Tcl_StatBuf stat;
Tcl_ListObjIndex(NULL, libPath, i, &directory);
- path = Tcl_FSJoinToPath(directory, 1, &encodingObj);
+ path = Tcl_FSJoinToPath(directory, 1, &encodingObj);
Tcl_IncrRefCount(path);
if ((0 == Tcl_FSStat(path, &stat)) && S_ISDIR(stat.st_mode)) {
Tcl_ListObjAppendElement(NULL, searchPath, path);
}
Tcl_DecrRefCount(path);
}
+
Tcl_DecrRefCount(libPath);
Tcl_DecrRefCount(encodingObj);
*encodingPtr = libraryPath.encoding;
@@ -3215,8 +3279,17 @@ InitializeEncodingSearchPath(valuePtr, lengthPtr, encodingPtr)
((Encoding *)(*encodingPtr))->refCount++;
}
bytes = Tcl_GetStringFromObj(searchPath, &numBytes);
+
*lengthPtr = numBytes;
*valuePtr = ckalloc((unsigned int) numBytes + 1);
memcpy((VOID *) *valuePtr, (VOID *) bytes, (size_t) numBytes + 1);
Tcl_DecrRefCount(searchPath);
}
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */