summaryrefslogtreecommitdiffstats
path: root/generic/tclEncoding.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r--generic/tclEncoding.c587
1 files changed, 354 insertions, 233 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index 88f4d3a..387a2a6 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src);
* convert between various character sets and UTF-8.
*/
-typedef struct Encoding {
+typedef struct {
char *name; /* Name of encoding. Malloced because (1) hash
* table entry that owns this encoding may be
* freed prior to this encoding being freed,
@@ -46,7 +46,7 @@ typedef struct Encoding {
* nullSize is 2, this is a function that
* returns the number of bytes in a 0x0000
* terminated string. */
- int refCount; /* Number of uses of this structure. */
+ size_t refCount; /* Number of uses of this structure. */
Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */
} Encoding;
@@ -57,7 +57,7 @@ typedef struct Encoding {
* encoding.
*/
-typedef struct TableEncodingData {
+typedef struct {
int fallback; /* Character (in this encoding) to substitute
* when this encoding cannot represent a UTF-8
* character. */
@@ -91,7 +91,7 @@ typedef struct TableEncodingData {
* for switching character sets.
*/
-typedef struct EscapeSubTable {
+typedef struct {
unsigned sequenceLen; /* Length of following string. */
char sequence[16]; /* Escape code that marks this encoding. */
char name[32]; /* Name for encoding. */
@@ -100,7 +100,7 @@ typedef struct EscapeSubTable {
* yet. */
} EscapeSubTable;
-typedef struct EscapeEncodingData {
+typedef struct {
int fallback; /* Character (in this encoding) to substitute
* when this encoding cannot represent a UTF-8
* character. */
@@ -234,12 +234,17 @@ static int TableToUtfProc(ClientData clientData, const char *src,
char *dst, int dstLen, int *srcReadPtr,
int *dstWrotePtr, int *dstCharsPtr);
static size_t unilen(const char *src);
-static int UnicodeToUtfProc(ClientData clientData,
+static int Utf16ToUtfProc(ClientData clientData,
const char *src, int srcLen, int flags,
Tcl_EncodingState *statePtr, char *dst, int dstLen,
int *srcReadPtr, int *dstWrotePtr,
int *dstCharsPtr);
-static int UtfToUnicodeProc(ClientData clientData,
+static int UtfToUtf16Proc(ClientData clientData,
+ const char *src, int srcLen, int flags,
+ Tcl_EncodingState *statePtr, char *dst, int dstLen,
+ int *srcReadPtr, int *dstWrotePtr,
+ int *dstCharsPtr);
+static int UtfToUcs2Proc(ClientData clientData,
const char *src, int srcLen, int flags,
Tcl_EncodingState *statePtr, char *dst, int dstLen,
int *srcReadPtr, int *dstWrotePtr,
@@ -279,6 +284,21 @@ static int Iso88591ToUtfProc(ClientData clientData,
static const Tcl_ObjType encodingType = {
"encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL
};
+#define EncodingSetIntRep(objPtr, encoding) \
+ do { \
+ Tcl_ObjIntRep ir; \
+ ir.twoPtrValue.ptr1 = (encoding); \
+ ir.twoPtrValue.ptr2 = NULL; \
+ Tcl_StoreIntRep((objPtr), &encodingType, &ir); \
+ } while (0)
+
+#define EncodingGetIntRep(objPtr, encoding) \
+ do { \
+ const Tcl_ObjIntRep *irPtr; \
+ irPtr = TclFetchIntRep ((objPtr), &encodingType); \
+ (encoding) = irPtr ? irPtr->twoPtrValue.ptr1 : NULL; \
+ } while (0)
+
/*
*----------------------------------------------------------------------
@@ -305,17 +325,16 @@ Tcl_GetEncodingFromObj(
Tcl_Obj *objPtr,
Tcl_Encoding *encodingPtr)
{
- const char *name = Tcl_GetString(objPtr);
-
- if (objPtr->typePtr != &encodingType) {
- Tcl_Encoding encoding = Tcl_GetEncoding(interp, name);
+ Tcl_Encoding encoding;
+ const char *name = TclGetString(objPtr);
+ EncodingGetIntRep(objPtr, encoding);
+ if (encoding == NULL) {
+ encoding = Tcl_GetEncoding(interp, name);
if (encoding == NULL) {
return TCL_ERROR;
}
- TclFreeIntRep(objPtr);
- objPtr->internalRep.twoPtrValue.ptr1 = encoding;
- objPtr->typePtr = &encodingType;
+ EncodingSetIntRep(objPtr, encoding);
}
*encodingPtr = Tcl_GetEncoding(NULL, name);
return TCL_OK;
@@ -335,8 +354,10 @@ static void
FreeEncodingIntRep(
Tcl_Obj *objPtr)
{
- Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1);
- objPtr->typePtr = NULL;
+ Tcl_Encoding encoding;
+
+ EncodingGetIntRep(objPtr, encoding);
+ Tcl_FreeEncoding(encoding);
}
/*
@@ -354,7 +375,8 @@ DupEncodingIntRep(
Tcl_Obj *srcPtr,
Tcl_Obj *dupPtr)
{
- dupPtr->internalRep.twoPtrValue.ptr1 = Tcl_GetEncoding(NULL, srcPtr->bytes);
+ Tcl_Encoding encoding = Tcl_GetEncoding(NULL, TclGetString(srcPtr));
+ EncodingSetIntRep(dupPtr, encoding);
}
/*
@@ -547,11 +569,16 @@ TclInitEncodingSubsystem(void)
TableEncodingData *dataPtr;
unsigned size;
unsigned short i;
+ union {
+ char c;
+ short s;
+ } isLe;
if (encodingsInitialized) {
return;
}
+ isLe.s = 1;
Tcl_MutexLock(&encodingMutex);
Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS);
Tcl_MutexUnlock(&encodingMutex);
@@ -562,7 +589,7 @@ TclInitEncodingSubsystem(void)
* formed UTF-8 into a properly formed stream.
*/
- type.encodingName = "identity";
+ type.encodingName = NULL;
type.toUtfProc = BinaryProc;
type.fromUtfProc = BinaryProc;
type.freeProc = NULL;
@@ -578,14 +605,39 @@ TclInitEncodingSubsystem(void)
type.clientData = NULL;
Tcl_CreateEncoding(&type);
- type.encodingName = "unicode";
- type.toUtfProc = UnicodeToUtfProc;
- type.fromUtfProc = UtfToUnicodeProc;
+ type.toUtfProc = Utf16ToUtfProc;
+ type.fromUtfProc = UtfToUcs2Proc;
type.freeProc = NULL;
type.nullSize = 2;
- type.clientData = NULL;
+ type.encodingName = "ucs-2le";
+ type.clientData = INT2PTR(1);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "ucs-2be";
+ type.clientData = INT2PTR(0);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "ucs-2";
+ type.clientData = INT2PTR(isLe.c);
Tcl_CreateEncoding(&type);
+ type.toUtfProc = Utf16ToUtfProc;
+ type.fromUtfProc = UtfToUtf16Proc;
+ type.freeProc = NULL;
+ type.nullSize = 2;
+ type.encodingName = "utf-16le";
+ type.clientData = INT2PTR(1);;
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "utf-16be";
+ type.clientData = INT2PTR(0);
+ Tcl_CreateEncoding(&type);
+ type.encodingName = "utf-16";
+ type.clientData = INT2PTR(isLe.c);;
+ Tcl_CreateEncoding(&type);
+
+#ifndef TCL_NO_DEPRECATED
+ type.encodingName = "unicode";
+ Tcl_CreateEncoding(&type);
+#endif
+
/*
* Need the iso8859-1 encoding in order to process binary data, so force
* it to always be embedded. Note that this encoding *must* be a proper
@@ -593,14 +645,14 @@ TclInitEncodingSubsystem(void)
* code to duplicate the structure of a table encoding here.
*/
- dataPtr = ckalloc(sizeof(TableEncodingData));
+ dataPtr = Tcl_Alloc(sizeof(TableEncodingData));
memset(dataPtr, 0, sizeof(TableEncodingData));
dataPtr->fallback = '?';
size = 256*(sizeof(unsigned short *) + sizeof(unsigned short));
- dataPtr->toUnicode = ckalloc(size);
+ dataPtr->toUnicode = Tcl_Alloc(size);
memset(dataPtr->toUnicode, 0, size);
- dataPtr->fromUnicode = ckalloc(size);
+ dataPtr->fromUnicode = Tcl_Alloc(size);
memset(dataPtr->fromUnicode, 0, size);
dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256);
@@ -677,68 +729,6 @@ TclFinalizeEncodingSubsystem(void)
/*
*-------------------------------------------------------------------------
*
- * Tcl_GetDefaultEncodingDir --
- *
- * Legacy public interface to retrieve first directory in the encoding
- * searchPath.
- *
- * Results:
- * The directory pathname, as a string, or NULL for an empty encoding
- * search path.
- *
- * Side effects:
- * None.
- *
- *-------------------------------------------------------------------------
- */
-
-const char *
-Tcl_GetDefaultEncodingDir(void)
-{
- int numDirs;
- Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath();
-
- Tcl_ListObjLength(NULL, searchPath, &numDirs);
- if (numDirs == 0) {
- return NULL;
- }
- Tcl_ListObjIndex(NULL, searchPath, 0, &first);
-
- return Tcl_GetString(first);
-}
-
-/*
- *-------------------------------------------------------------------------
- *
- * Tcl_SetDefaultEncodingDir --
- *
- * Legacy public interface to set the first directory in the encoding
- * search path.
- *
- * Results:
- * None.
- *
- * Side effects:
- * Modifies the encoding search path.
- *
- *-------------------------------------------------------------------------
- */
-
-void
-Tcl_SetDefaultEncodingDir(
- const char *path)
-{
- Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath();
- Tcl_Obj *directory = Tcl_NewStringObj(path, -1);
-
- searchPath = Tcl_DuplicateObj(searchPath);
- Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory);
- Tcl_SetEncodingSearchPath(searchPath);
-}
-
-/*
- *-------------------------------------------------------------------------
- *
* Tcl_GetEncoding --
*
* Given the name of a encoding, find the corresponding Tcl_Encoding
@@ -843,19 +833,17 @@ FreeEncoding(
if (encodingPtr == NULL) {
return;
}
- if (encodingPtr->refCount<=0) {
- Tcl_Panic("FreeEncoding: refcount problem !!!");
- }
- encodingPtr->refCount--;
- if (encodingPtr->refCount == 0) {
+ if (encodingPtr->refCount-- <= 1) {
if (encodingPtr->freeProc != NULL) {
encodingPtr->freeProc(encodingPtr->clientData);
}
if (encodingPtr->hPtr != NULL) {
Tcl_DeleteHashEntry(encodingPtr->hPtr);
}
- ckfree(encodingPtr->name);
- ckfree(encodingPtr);
+ if (encodingPtr->name) {
+ Tcl_Free(encodingPtr->name);
+ }
+ Tcl_Free(encodingPtr);
}
}
@@ -976,7 +964,7 @@ Tcl_GetEncodingNames(
* Side effects:
* The reference count of the new system encoding is incremented. The
* reference count of the old system encoding is decremented and it may
- * be freed.
+ * be freed. All VFS cached information is invalidated.
*
*------------------------------------------------------------------------
*/
@@ -1007,6 +995,7 @@ Tcl_SetSystemEncoding(
FreeEncoding(systemEncoding);
systemEncoding = encoding;
Tcl_MutexUnlock(&encodingMutex);
+ Tcl_FSMountsChanged(NULL);
return TCL_OK;
}
@@ -1042,9 +1031,24 @@ Tcl_CreateEncoding(
const Tcl_EncodingType *typePtr)
/* The encoding type. */
{
+ Encoding *encodingPtr = Tcl_Alloc(sizeof(Encoding));
+ encodingPtr->name = NULL;
+ encodingPtr->toUtfProc = typePtr->toUtfProc;
+ encodingPtr->fromUtfProc = typePtr->fromUtfProc;
+ encodingPtr->freeProc = typePtr->freeProc;
+ encodingPtr->nullSize = typePtr->nullSize;
+ encodingPtr->clientData = typePtr->clientData;
+ if (typePtr->nullSize == 1) {
+ encodingPtr->lengthProc = (LengthProc *) strlen;
+ } else {
+ encodingPtr->lengthProc = (LengthProc *) unilen;
+ }
+ encodingPtr->refCount = 1;
+ encodingPtr->hPtr = NULL;
+
+ if (typePtr->encodingName) {
Tcl_HashEntry *hPtr;
int isNew;
- Encoding *encodingPtr;
char *name;
Tcl_MutexLock(&encodingMutex);
@@ -1055,30 +1059,17 @@ Tcl_CreateEncoding(
* reference goes away.
*/
- encodingPtr = Tcl_GetHashValue(hPtr);
- encodingPtr->hPtr = NULL;
+ Encoding *replaceMe = Tcl_GetHashValue(hPtr);
+ replaceMe->hPtr = NULL;
}
- name = ckalloc(strlen(typePtr->encodingName) + 1);
-
- encodingPtr = ckalloc(sizeof(Encoding));
+ name = Tcl_Alloc(strlen(typePtr->encodingName) + 1);
encodingPtr->name = strcpy(name, typePtr->encodingName);
- encodingPtr->toUtfProc = typePtr->toUtfProc;
- encodingPtr->fromUtfProc = typePtr->fromUtfProc;
- encodingPtr->freeProc = typePtr->freeProc;
- encodingPtr->nullSize = typePtr->nullSize;
- encodingPtr->clientData = typePtr->clientData;
- if (typePtr->nullSize == 1) {
- encodingPtr->lengthProc = (LengthProc *) strlen;
- } else {
- encodingPtr->lengthProc = (LengthProc *) unilen;
- }
- encodingPtr->refCount = 1;
encodingPtr->hPtr = hPtr;
Tcl_SetHashValue(hPtr, encodingPtr);
Tcl_MutexUnlock(&encodingMutex);
-
+ }
return (Tcl_Encoding) encodingPtr;
}
@@ -1108,7 +1099,7 @@ Tcl_ExternalToUtfDString(
Tcl_Encoding encoding, /* The encoding for the source string, or NULL
* for the default system encoding. */
const char *src, /* Source string in specified encoding. */
- int srcLen, /* Source string length in bytes, or < 0 for
+ size_t srcLen, /* Source string length in bytes, or -1 for
* encoding-specific string length. */
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
@@ -1116,7 +1107,8 @@ Tcl_ExternalToUtfDString(
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ int flags, result, soFar, srcRead, dstWrote, dstChars;
+ size_t dstLen;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1129,7 +1121,7 @@ Tcl_ExternalToUtfDString(
if (src == NULL) {
srcLen = 0;
- } else if (srcLen < 0) {
+ } else if (srcLen == TCL_AUTO_LENGTH) {
srcLen = encodingPtr->lengthProc(src);
}
@@ -1181,8 +1173,8 @@ Tcl_ExternalToUtf(
Tcl_Encoding encoding, /* The encoding for the source string, or NULL
* for the default system encoding. */
const char *src, /* Source string in specified encoding. */
- int srcLen, /* Source string length in bytes, or < 0 for
- * encoding-specific string length. */
+ size_t srcLen, /* Source string length in bytes, or -1
+ * for encoding-specific string length. */
int flags, /* Conversion control flags. */
Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
* information used during a piecewise
@@ -1191,7 +1183,7 @@ Tcl_ExternalToUtf(
* routine under control of flags argument. */
char *dst, /* Output buffer in which converted string is
* stored. */
- int dstLen, /* The maximum length of output buffer in
+ size_t dstLen, /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr, /* Filled with the number of bytes from the
* source string that were converted. This may
@@ -1219,7 +1211,7 @@ Tcl_ExternalToUtf(
if (src == NULL) {
srcLen = 0;
- } else if (srcLen < 0) {
+ } else if (srcLen == TCL_AUTO_LENGTH) {
srcLen = encodingPtr->lengthProc(src);
}
if (statePtr == NULL) {
@@ -1259,7 +1251,7 @@ Tcl_ExternalToUtf(
if (*dstCharsPtr <= maxChars) {
break;
}
- dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX;
+ dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1);
flags = savedFlags;
*statePtr = savedState;
} while (1);
@@ -1298,7 +1290,7 @@ Tcl_UtfToExternalDString(
Tcl_Encoding encoding, /* The encoding for the converted string, or
* NULL for the default system encoding. */
const char *src, /* Source string in UTF-8. */
- int srcLen, /* Source string length in bytes, or < 0 for
+ size_t srcLen, /* Source string length in bytes, or -1 for
* strlen(). */
Tcl_DString *dstPtr) /* Uninitialized or free DString in which the
* converted string is stored. */
@@ -1306,7 +1298,8 @@ Tcl_UtfToExternalDString(
char *dst;
Tcl_EncodingState state;
const Encoding *encodingPtr;
- int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars;
+ int flags, result, soFar, srcRead, dstWrote, dstChars;
+ size_t dstLen;
Tcl_DStringInit(dstPtr);
dst = Tcl_DStringValue(dstPtr);
@@ -1319,7 +1312,7 @@ Tcl_UtfToExternalDString(
if (src == NULL) {
srcLen = 0;
- } else if (srcLen < 0) {
+ } else if (srcLen == TCL_AUTO_LENGTH) {
srcLen = strlen(src);
}
flags = TCL_ENCODING_START | TCL_ENCODING_END;
@@ -1373,8 +1366,8 @@ Tcl_UtfToExternal(
Tcl_Encoding encoding, /* The encoding for the converted string, or
* NULL for the default system encoding. */
const char *src, /* Source string in UTF-8. */
- int srcLen, /* Source string length in bytes, or < 0 for
- * strlen(). */
+ size_t srcLen, /* Source string length in bytes, or -1
+ * for strlen(). */
int flags, /* Conversion control flags. */
Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
* information used during a piecewise
@@ -1383,7 +1376,7 @@ Tcl_UtfToExternal(
* routine under control of flags argument. */
char *dst, /* Output buffer in which converted string
* is stored. */
- int dstLen, /* The maximum length of output buffer in
+ size_t dstLen, /* The maximum length of output buffer in
* bytes. */
int *srcReadPtr, /* Filled with the number of bytes from the
* source string that were converted. This may
@@ -1408,7 +1401,7 @@ Tcl_UtfToExternal(
if (src == NULL) {
srcLen = 0;
- } else if (srcLen < 0) {
+ } else if (srcLen == TCL_AUTO_LENGTH) {
srcLen = strlen(src);
}
if (statePtr == NULL) {
@@ -1440,10 +1433,10 @@ Tcl_UtfToExternal(
/*
*---------------------------------------------------------------------------
*
- * Tcl_InitSubsystems/Tcl_FindExecutable --
+ * Tcl_FindExecutable --
*
- * This function initializes everything needed for the Tcl library
- * to be able to operate.
+ * This function computes the absolute path name of the current
+ * application, given its argv[0] value.
*
* Results:
* None.
@@ -1454,32 +1447,16 @@ Tcl_UtfToExternal(
*
*---------------------------------------------------------------------------
*/
-MODULE_SCOPE const TclStubs tclStubs;
-
-static const struct {
- const TclStubs *stubs;
- const char version[12];
-} stubInfo = {
- &tclStubs, TCL_PATCH_LEVEL
-};
-
-const char *
-Tcl_InitSubsystems(TCL_NORETURN1 Tcl_PanicProc *panicProc)
-{
- Tcl_SetPanicProc(panicProc);
- TclInitSubsystems();
- return stubInfo.version;
-}
-
#undef Tcl_FindExecutable
-void
+const char *
Tcl_FindExecutable(
const char *argv0) /* The value of the application's argv[0]
* (native). */
{
- TclInitSubsystems();
+ const char *version = TclInitSubsystems();
TclpSetInitialEncodings();
TclpFindExecutable(argv0);
+ return version;
}
/*
@@ -1534,10 +1511,10 @@ OpenEncodingFileChannel(
}
}
if (!verified) {
- const char *dirString = Tcl_GetString(directory);
+ const char *dirString = TclGetString(directory);
for (i=0; i<numDirs && !verified; i++) {
- if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) {
+ if (strcmp(dirString, TclGetString(dir[i])) == 0) {
verified = 1;
}
}
@@ -1736,7 +1713,9 @@ LoadTableEncoding(
};
Tcl_DStringInit(&lineString);
- Tcl_Gets(chan, &lineString);
+ if (Tcl_Gets(chan, &lineString) == TCL_IO_FAILURE) {
+ return NULL;
+ }
line = Tcl_DStringValue(&lineString);
fallback = (int) strtol(line, &line, 16);
@@ -1755,7 +1734,7 @@ LoadTableEncoding(
#undef PAGESIZE
#define PAGESIZE (256 * sizeof(unsigned short))
- dataPtr = ckalloc(sizeof(TableEncodingData));
+ dataPtr = Tcl_Alloc(sizeof(TableEncodingData));
memset(dataPtr, 0, sizeof(TableEncodingData));
dataPtr->fallback = fallback;
@@ -1767,7 +1746,7 @@ LoadTableEncoding(
*/
size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
- dataPtr->toUnicode = ckalloc(size);
+ dataPtr->toUnicode = Tcl_Alloc(size);
memset(dataPtr->toUnicode, 0, size);
pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256);
@@ -1776,9 +1755,12 @@ LoadTableEncoding(
for (i = 0; i < numPages; i++) {
int ch;
const char *p;
+ size_t expected = 3 + 16 * (16 * 4 + 1);
- Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0);
- p = Tcl_GetString(objPtr);
+ if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) {
+ return NULL;
+ }
+ p = TclGetString(objPtr);
hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])];
dataPtr->toUnicode[hi] = pageMemPtr;
p += 2;
@@ -1825,7 +1807,7 @@ LoadTableEncoding(
}
}
size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE;
- dataPtr->fromUnicode = ckalloc(size);
+ dataPtr->fromUnicode = Tcl_Alloc(size);
memset(dataPtr->fromUnicode, 0, size);
pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256);
@@ -1921,7 +1903,7 @@ LoadTableEncoding(
*/
for (TclDStringClear(&lineString);
- (len = Tcl_Gets(chan, &lineString)) >= 0;
+ (len = Tcl_Gets(chan, &lineString)) != -1;
TclDStringClear(&lineString)) {
const unsigned char *p;
int to, from;
@@ -2015,7 +1997,7 @@ LoadEscapeEncoding(
Tcl_DString lineString;
Tcl_DStringInit(&lineString);
- if (Tcl_Gets(chan, &lineString) < 0) {
+ if (Tcl_Gets(chan, &lineString) == TCL_IO_FAILURE) {
break;
}
line = Tcl_DStringValue(&lineString);
@@ -2057,21 +2039,21 @@ LoadEscapeEncoding(
Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est));
}
}
- ckfree(argv);
+ Tcl_Free((void *)argv);
Tcl_DStringFree(&lineString);
}
size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable)
+ Tcl_DStringLength(&escapeData);
- dataPtr = ckalloc(size);
+ dataPtr = Tcl_Alloc(size);
dataPtr->initLen = strlen(init);
- memcpy(dataPtr->init, init, (unsigned) dataPtr->initLen + 1);
+ memcpy(dataPtr->init, init, dataPtr->initLen + 1);
dataPtr->finalLen = strlen(final);
- memcpy(dataPtr->final, final, (unsigned) dataPtr->finalLen + 1);
+ memcpy(dataPtr->final, final, dataPtr->finalLen + 1);
dataPtr->numSubTables =
Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable);
memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData),
- (size_t) Tcl_DStringLength(&escapeData));
+ Tcl_DStringLength(&escapeData));
Tcl_DStringFree(&escapeData);
memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes));
@@ -2159,7 +2141,7 @@ BinaryProc(
*srcReadPtr = srcLen;
*dstWrotePtr = srcLen;
*dstCharsPtr = srcLen;
- memcpy(dst, src, (size_t) srcLen);
+ memcpy(dst, src, srcLen);
return result;
}
@@ -2312,8 +2294,11 @@ UtfToUtfProc(
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd;
int result, numChars, charLimit = INT_MAX;
- Tcl_UniChar ch;
+ Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ if (flags & TCL_ENCODING_START) {
+ *statePtr = 0;
+ }
result = TCL_OK;
srcStart = src;
@@ -2345,7 +2330,7 @@ UtfToUtfProc(
}
if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) {
/*
- * Copy 7bit chatacters, but skip null-bytes when we are in input
+ * Copy 7bit characters, but skip null-bytes when we are in input
* mode, so that they get converted to 0xc080.
*/
@@ -2360,17 +2345,24 @@ UtfToUtfProc(
src += 2;
} else if (!Tcl_UtfCharComplete(src, srcEnd - src)) {
/*
- * Always check before using Tcl_UtfToUniChar. Not doing can so
- * cause it run beyond the endof the buffer! If we happen such an
- * incomplete char its byts are made to represent themselves.
+ * Always check before using TclUtfToUniChar. Not doing can so
+ * cause it run beyond the end of the buffer! If we happen such an
+ * incomplete char its bytes are made to represent themselves.
*/
- ch = (unsigned char) *src;
+ *chPtr = (unsigned char) *src;
src += 1;
- dst += Tcl_UniCharToUtf(ch, dst);
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
} else {
- src += Tcl_UtfToUniChar(src, &ch);
- dst += Tcl_UniCharToUtf(ch, dst);
+ int len = TclUtfToUniChar(src, chPtr);
+ src += len;
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
+#if TCL_UTF_MAX <= 4
+ if ((*chPtr >= 0xD800) && (len < 3)) {
+ src += TclUtfToUniChar(src + len, chPtr);
+ dst += Tcl_UniCharToUtf(*chPtr, dst);
+ }
+#endif
}
}
@@ -2383,9 +2375,9 @@ UtfToUtfProc(
/*
*-------------------------------------------------------------------------
*
- * UnicodeToUtfProc --
+ * Utf16ToUtfProc --
*
- * Convert from Unicode to UTF-8.
+ * Convert from UTF-16 to UTF-8.
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -2397,8 +2389,8 @@ UtfToUtfProc(
*/
static int
-UnicodeToUtfProc(
- ClientData clientData, /* Not used. */
+Utf16ToUtfProc(
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
const char *src, /* Source string in Unicode. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2426,16 +2418,16 @@ UnicodeToUtfProc(
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart;
int result, numChars, charLimit = INT_MAX;
- Tcl_UniChar ch;
+ unsigned short ch;
if (flags & TCL_ENCODING_CHAR_LIMIT) {
charLimit = *dstCharsPtr;
}
result = TCL_OK;
- if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+ if ((srcLen % sizeof(unsigned short)) != 0) {
result = TCL_CONVERT_MULTIBYTE;
- srcLen /= sizeof(Tcl_UniChar);
- srcLen *= sizeof(Tcl_UniChar);
+ srcLen /= sizeof(unsigned short);
+ srcLen *= sizeof(unsigned short);
}
srcStart = src;
@@ -2450,18 +2442,21 @@ UnicodeToUtfProc(
break;
}
+ if (clientData) {
+ ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF);
+ } else {
+ ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF);
+ }
/*
* Special case for 1-byte utf chars for speed. Make sure we work with
- * Tcl_UniChar-size data.
+ * unsigned short-size data.
*/
-
- ch = *(Tcl_UniChar *)src;
if (ch && ch < 0x80) {
*dst++ = (ch & 0xFF);
} else {
dst += Tcl_UniCharToUtf(ch, dst);
}
- src += sizeof(Tcl_UniChar);
+ src += sizeof(unsigned short);
}
*srcReadPtr = src - srcStart;
@@ -2473,9 +2468,9 @@ UnicodeToUtfProc(
/*
*-------------------------------------------------------------------------
*
- * UtfToUnicodeProc --
+ * UtfToUtf16Proc --
*
- * Convert from UTF-8 to Unicode.
+ * Convert from UTF-8 to UTF-16.
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -2487,9 +2482,8 @@ UnicodeToUtfProc(
*/
static int
-UtfToUnicodeProc(
- ClientData clientData, /* TableEncodingData that specifies
- * encoding. */
+UtfToUtf16Proc(
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
const char *src, /* Source string in UTF-8. */
int srcLen, /* Source string length in bytes. */
int flags, /* Conversion control flags. */
@@ -2516,8 +2510,11 @@ UtfToUnicodeProc(
{
const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
- Tcl_UniChar ch;
+ Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr;
+ if (flags & TCL_ENCODING_START) {
+ *statePtr = 0;
+ }
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -2543,34 +2540,44 @@ UtfToUnicodeProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- src += TclUtfToUniChar(src, &ch);
+ src += TclUtfToUniChar(src, chPtr);
/*
* Need to handle this in a way that won't cause misalignment by
* casting dst to a Tcl_UniChar. [Bug 1122671]
*/
-#ifdef WORDS_BIGENDIAN
+ if (clientData) {
#if TCL_UTF_MAX > 4
- *dst++ = (ch >> 24);
- *dst++ = ((ch >> 16) & 0xFF);
- *dst++ = ((ch >> 8) & 0xFF);
- *dst++ = (ch & 0xFF);
+ if (*chPtr <= 0xFFFF) {
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (*chPtr >> 8);
+ } else {
+ *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
+ *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
+ }
#else
- *dst++ = (ch >> 8);
- *dst++ = (ch & 0xFF);
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (*chPtr >> 8);
#endif
-#else
+ } else {
#if TCL_UTF_MAX > 4
- *dst++ = (ch & 0xFF);
- *dst++ = ((ch >> 8) & 0xFF);
- *dst++ = ((ch >> 16) & 0xFF);
- *dst++ = (ch >> 24);
+ if (*chPtr <= 0xFFFF) {
+ *dst++ = (*chPtr >> 8);
+ *dst++ = (*chPtr & 0xFF);
+ } else {
+ *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC;
+ *dst++ = (*chPtr & 0xFF);
+ *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8;
+ *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF);
+ }
#else
- *dst++ = (ch & 0xFF);
- *dst++ = (ch >> 8);
-#endif
+ *dst++ = (*chPtr >> 8);
+ *dst++ = (*chPtr & 0xFF);
#endif
+ }
}
*srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
@@ -2581,6 +2588,113 @@ UtfToUnicodeProc(
/*
*-------------------------------------------------------------------------
*
+ * UtfToUcs2Proc --
+ *
+ * Convert from UTF-8 to UCS-2.
+ *
+ * Results:
+ * Returns TCL_OK if conversion was successful.
+ *
+ * Side effects:
+ * None.
+ *
+ *-------------------------------------------------------------------------
+ */
+
+static int
+UtfToUcs2Proc(
+ ClientData clientData, /* != NULL means LE, == NUL means BE */
+ const char *src, /* Source string in UTF-8. */
+ int srcLen, /* Source string length in bytes. */
+ int flags, /* Conversion control flags. */
+ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state
+ * information used during a piecewise
+ * conversion. Contents of statePtr are
+ * initialized and/or reset by conversion
+ * routine under control of flags argument. */
+ char *dst, /* Output buffer in which converted string is
+ * stored. */
+ int dstLen, /* The maximum length of output buffer in
+ * bytes. */
+ int *srcReadPtr, /* Filled with the number of bytes from the
+ * source string that were converted. This may
+ * be less than the original source length if
+ * there was a problem converting some source
+ * characters. */
+ int *dstWrotePtr, /* Filled with the number of bytes that were
+ * stored in the output buffer as a result of
+ * the conversion. */
+ int *dstCharsPtr) /* Filled with the number of characters that
+ * correspond to the bytes stored in the
+ * output buffer. */
+{
+ const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
+ int result, numChars;
+#if TCL_UTF_MAX <= 4
+ int len;
+#endif
+ Tcl_UniChar ch = 0;
+
+ srcStart = src;
+ srcEnd = src + srcLen;
+ srcClose = srcEnd;
+ if ((flags & TCL_ENCODING_END) == 0) {
+ srcClose -= TCL_UTF_MAX;
+ }
+
+ dstStart = dst;
+ dstEnd = dst + dstLen - sizeof(Tcl_UniChar);
+
+ result = TCL_OK;
+ for (numChars = 0; src < srcEnd; numChars++) {
+ if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
+ /*
+ * If there is more string to follow, this will ensure that the
+ * last UTF-8 character in the source buffer hasn't been cut off.
+ */
+
+ result = TCL_CONVERT_MULTIBYTE;
+ break;
+ }
+ if (dst > dstEnd) {
+ result = TCL_CONVERT_NOSPACE;
+ break;
+ }
+#if TCL_UTF_MAX <= 4
+ src += (len = TclUtfToUniChar(src, &ch));
+ if ((ch >= 0xD800) && (len < 3)) {
+ src += TclUtfToUniChar(src, &ch);
+ ch = 0xFFFD;
+ }
+#else
+ src += TclUtfToUniChar(src, &ch);
+ if (ch > 0xFFFF) {
+ ch = 0xFFFD;
+ }
+#endif
+
+ /*
+ * Need to handle this in a way that won't cause misalignment by
+ * casting dst to a Tcl_UniChar. [Bug 1122671]
+ */
+
+ if (clientData) {
+ *dst++ = (ch & 0xFF);
+ *dst++ = (ch >> 8);
+ } else {
+ *dst++ = (ch >> 8);
+ *dst++ = (ch & 0xFF);
+ }
+ }
+ *srcReadPtr = src - srcStart;
+ *dstWrotePtr = dst - dstStart;
+ *dstCharsPtr = numChars;
+ return result;
+}
+
+/*
+ *-------------------------------------------------------------------------
+ *
* TableToUtfProc --
*
* Convert from the encoding specified by the TableEncodingData into
@@ -2626,7 +2740,7 @@ TableToUtfProc(
const char *srcStart, *srcEnd;
const char *dstEnd, *dstStart, *prefixBytes;
int result, byte, numChars, charLimit = INT_MAX;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
const unsigned short *const *toUnicode;
const unsigned short *pageZero;
TableEncodingData *dataPtr = clientData;
@@ -2738,7 +2852,7 @@ TableFromUtfProc(
{
const char *srcStart, *srcEnd, *srcClose;
const char *dstStart, *dstEnd, *prefixBytes;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
int result, len, word, numChars;
TableEncodingData *dataPtr = clientData;
const unsigned short *const *fromUnicode;
@@ -2770,7 +2884,7 @@ TableFromUtfProc(
}
len = TclUtfToUniChar(src, &ch);
-#if TCL_UTF_MAX > 3
+#if TCL_UTF_MAX > 4
/*
* This prevents a crash condition. More evaluation is required for
* full support of int Tcl_UniChar. [Bug 1004065]
@@ -2779,6 +2893,10 @@ TableFromUtfProc(
if (ch & 0xffff0000) {
word = 0;
} else
+#else
+ if (!len) {
+ word = 0;
+ } else
#endif
word = fromUnicode[(ch >> 8)][ch & 0xff];
@@ -2872,7 +2990,7 @@ Iso88591ToUtfProc(
result = TCL_OK;
for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) {
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
@@ -2958,7 +3076,7 @@ Iso88591FromUtfProc(
dstEnd = dst + dstLen - 1;
for (numChars = 0; src < srcEnd; numChars++) {
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
int len;
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
@@ -2976,12 +3094,18 @@ Iso88591FromUtfProc(
* Check for illegal characters.
*/
- if (ch > 0xff) {
+ if (ch > 0xff
+#if TCL_UTF_MAX <= 4
+ || ((ch >= 0xD800) && (len < 3))
+#endif
+ ) {
if (flags & TCL_ENCODING_STOPONERROR) {
result = TCL_CONVERT_UNKNOWN;
break;
}
-
+#if TCL_UTF_MAX <= 4
+ if ((ch >= 0xD800) && (len < 3)) len = 4;
+#endif
/*
* Plunge on, using '?' as a fallback character.
*/
@@ -3031,11 +3155,11 @@ TableFreeProc(
* Make sure we aren't freeing twice on shutdown. [Bug 219314]
*/
- ckfree(dataPtr->toUnicode);
+ Tcl_Free(dataPtr->toUnicode);
dataPtr->toUnicode = NULL;
- ckfree(dataPtr->fromUnicode);
+ Tcl_Free(dataPtr->fromUnicode);
dataPtr->fromUnicode = NULL;
- ckfree(dataPtr);
+ Tcl_Free(dataPtr);
}
/*
@@ -3330,7 +3454,7 @@ EscapeFromUtfProc(
*dstWrotePtr = 0;
return TCL_CONVERT_NOSPACE;
}
- memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen);
+ memcpy(dst, dataPtr->init, dataPtr->initLen);
dst += dataPtr->initLen;
} else {
state = PTR2INT(*statePtr);
@@ -3345,7 +3469,7 @@ EscapeFromUtfProc(
for (numChars = 0; src < srcEnd; numChars++) {
unsigned len;
int word;
- Tcl_UniChar ch;
+ Tcl_UniChar ch = 0;
if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) {
/*
@@ -3390,7 +3514,7 @@ EscapeFromUtfProc(
/*
* The state variable has the value of oldState when word is 0.
- * In this case, the escape sequense should not be copied to dst
+ * In this case, the escape sequence should not be copied to dst
* because the current character set is not changed.
*/
@@ -3408,8 +3532,7 @@ EscapeFromUtfProc(
result = TCL_CONVERT_NOSPACE;
break;
}
- memcpy(dst, subTablePtr->sequence,
- (size_t) subTablePtr->sequenceLen);
+ memcpy(dst, subTablePtr->sequence, subTablePtr->sequenceLen);
dst += subTablePtr->sequenceLen;
}
}
@@ -3452,7 +3575,7 @@ EscapeFromUtfProc(
memcpy(dst, dataPtr->subTables[0].sequence, len);
dst += len;
}
- memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen);
+ memcpy(dst, dataPtr->final, dataPtr->finalLen);
dst += dataPtr->finalLen;
state &= ~TCL_ENCODING_END;
}
@@ -3514,7 +3637,7 @@ EscapeFreeProc(
subTablePtr++;
}
}
- ckfree(dataPtr);
+ Tcl_Free(dataPtr);
}
/*
@@ -3615,11 +3738,11 @@ unilen(
static void
InitializeEncodingSearchPath(
char **valuePtr,
- int *lengthPtr,
+ size_t *lengthPtr,
Tcl_Encoding *encodingPtr)
{
const char *bytes;
- int i, numDirs, numBytes;
+ int i, numDirs;
Tcl_Obj *libPathObj, *encodingObj, *searchPathObj;
TclNewLiteralStringObj(encodingObj, "encoding");
@@ -3649,11 +3772,9 @@ InitializeEncodingSearchPath(
if (*encodingPtr) {
((Encoding *)(*encodingPtr))->refCount++;
}
- bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes);
-
- *lengthPtr = numBytes;
- *valuePtr = ckalloc(numBytes + 1);
- memcpy(*valuePtr, bytes, (size_t) numBytes + 1);
+ bytes = TclGetStringFromObj(searchPathObj, lengthPtr);
+ *valuePtr = Tcl_Alloc(*lengthPtr + 1);
+ memcpy(*valuePtr, bytes, *lengthPtr + 1);
Tcl_DecrRefCount(searchPathObj);
}