diff options
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 587 |
1 files changed, 354 insertions, 233 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 88f4d3a..387a2a6 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -18,7 +18,7 @@ typedef size_t (LengthProc)(const char *src); * convert between various character sets and UTF-8. */ -typedef struct Encoding { +typedef struct { char *name; /* Name of encoding. Malloced because (1) hash * table entry that owns this encoding may be * freed prior to this encoding being freed, @@ -46,7 +46,7 @@ typedef struct Encoding { * nullSize is 2, this is a function that * returns the number of bytes in a 0x0000 * terminated string. */ - int refCount; /* Number of uses of this structure. */ + size_t refCount; /* Number of uses of this structure. */ Tcl_HashEntry *hPtr; /* Hash table entry that owns this encoding. */ } Encoding; @@ -57,7 +57,7 @@ typedef struct Encoding { * encoding. */ -typedef struct TableEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -91,7 +91,7 @@ typedef struct TableEncodingData { * for switching character sets. */ -typedef struct EscapeSubTable { +typedef struct { unsigned sequenceLen; /* Length of following string. */ char sequence[16]; /* Escape code that marks this encoding. */ char name[32]; /* Name for encoding. */ @@ -100,7 +100,7 @@ typedef struct EscapeSubTable { * yet. */ } EscapeSubTable; -typedef struct EscapeEncodingData { +typedef struct { int fallback; /* Character (in this encoding) to substitute * when this encoding cannot represent a UTF-8 * character. */ @@ -234,12 +234,17 @@ static int TableToUtfProc(ClientData clientData, const char *src, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); -static int UnicodeToUtfProc(ClientData clientData, +static int Utf16ToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); -static int UtfToUnicodeProc(ClientData clientData, +static int UtfToUtf16Proc(ClientData clientData, + const char *src, int srcLen, int flags, + Tcl_EncodingState *statePtr, char *dst, int dstLen, + int *srcReadPtr, int *dstWrotePtr, + int *dstCharsPtr); +static int UtfToUcs2Proc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, @@ -279,6 +284,21 @@ static int Iso88591ToUtfProc(ClientData clientData, static const Tcl_ObjType encodingType = { "encoding", FreeEncodingIntRep, DupEncodingIntRep, NULL, NULL }; +#define EncodingSetIntRep(objPtr, encoding) \ + do { \ + Tcl_ObjIntRep ir; \ + ir.twoPtrValue.ptr1 = (encoding); \ + ir.twoPtrValue.ptr2 = NULL; \ + Tcl_StoreIntRep((objPtr), &encodingType, &ir); \ + } while (0) + +#define EncodingGetIntRep(objPtr, encoding) \ + do { \ + const Tcl_ObjIntRep *irPtr; \ + irPtr = TclFetchIntRep ((objPtr), &encodingType); \ + (encoding) = irPtr ? irPtr->twoPtrValue.ptr1 : NULL; \ + } while (0) + /* *---------------------------------------------------------------------- @@ -305,17 +325,16 @@ Tcl_GetEncodingFromObj( Tcl_Obj *objPtr, Tcl_Encoding *encodingPtr) { - const char *name = Tcl_GetString(objPtr); - - if (objPtr->typePtr != &encodingType) { - Tcl_Encoding encoding = Tcl_GetEncoding(interp, name); + Tcl_Encoding encoding; + const char *name = TclGetString(objPtr); + EncodingGetIntRep(objPtr, encoding); + if (encoding == NULL) { + encoding = Tcl_GetEncoding(interp, name); if (encoding == NULL) { return TCL_ERROR; } - TclFreeIntRep(objPtr); - objPtr->internalRep.twoPtrValue.ptr1 = encoding; - objPtr->typePtr = &encodingType; + EncodingSetIntRep(objPtr, encoding); } *encodingPtr = Tcl_GetEncoding(NULL, name); return TCL_OK; @@ -335,8 +354,10 @@ static void FreeEncodingIntRep( Tcl_Obj *objPtr) { - Tcl_FreeEncoding(objPtr->internalRep.twoPtrValue.ptr1); - objPtr->typePtr = NULL; + Tcl_Encoding encoding; + + EncodingGetIntRep(objPtr, encoding); + Tcl_FreeEncoding(encoding); } /* @@ -354,7 +375,8 @@ DupEncodingIntRep( Tcl_Obj *srcPtr, Tcl_Obj *dupPtr) { - dupPtr->internalRep.twoPtrValue.ptr1 = Tcl_GetEncoding(NULL, srcPtr->bytes); + Tcl_Encoding encoding = Tcl_GetEncoding(NULL, TclGetString(srcPtr)); + EncodingSetIntRep(dupPtr, encoding); } /* @@ -547,11 +569,16 @@ TclInitEncodingSubsystem(void) TableEncodingData *dataPtr; unsigned size; unsigned short i; + union { + char c; + short s; + } isLe; if (encodingsInitialized) { return; } + isLe.s = 1; Tcl_MutexLock(&encodingMutex); Tcl_InitHashTable(&encodingTable, TCL_STRING_KEYS); Tcl_MutexUnlock(&encodingMutex); @@ -562,7 +589,7 @@ TclInitEncodingSubsystem(void) * formed UTF-8 into a properly formed stream. */ - type.encodingName = "identity"; + type.encodingName = NULL; type.toUtfProc = BinaryProc; type.fromUtfProc = BinaryProc; type.freeProc = NULL; @@ -578,14 +605,39 @@ TclInitEncodingSubsystem(void) type.clientData = NULL; Tcl_CreateEncoding(&type); - type.encodingName = "unicode"; - type.toUtfProc = UnicodeToUtfProc; - type.fromUtfProc = UtfToUnicodeProc; + type.toUtfProc = Utf16ToUtfProc; + type.fromUtfProc = UtfToUcs2Proc; type.freeProc = NULL; type.nullSize = 2; - type.clientData = NULL; + type.encodingName = "ucs-2le"; + type.clientData = INT2PTR(1); + Tcl_CreateEncoding(&type); + type.encodingName = "ucs-2be"; + type.clientData = INT2PTR(0); + Tcl_CreateEncoding(&type); + type.encodingName = "ucs-2"; + type.clientData = INT2PTR(isLe.c); Tcl_CreateEncoding(&type); + type.toUtfProc = Utf16ToUtfProc; + type.fromUtfProc = UtfToUtf16Proc; + type.freeProc = NULL; + type.nullSize = 2; + type.encodingName = "utf-16le"; + type.clientData = INT2PTR(1);; + Tcl_CreateEncoding(&type); + type.encodingName = "utf-16be"; + type.clientData = INT2PTR(0); + Tcl_CreateEncoding(&type); + type.encodingName = "utf-16"; + type.clientData = INT2PTR(isLe.c);; + Tcl_CreateEncoding(&type); + +#ifndef TCL_NO_DEPRECATED + type.encodingName = "unicode"; + Tcl_CreateEncoding(&type); +#endif + /* * Need the iso8859-1 encoding in order to process binary data, so force * it to always be embedded. Note that this encoding *must* be a proper @@ -593,14 +645,14 @@ TclInitEncodingSubsystem(void) * code to duplicate the structure of a table encoding here. */ - dataPtr = ckalloc(sizeof(TableEncodingData)); + dataPtr = Tcl_Alloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = '?'; size = 256*(sizeof(unsigned short *) + sizeof(unsigned short)); - dataPtr->toUnicode = ckalloc(size); + dataPtr->toUnicode = Tcl_Alloc(size); memset(dataPtr->toUnicode, 0, size); - dataPtr->fromUnicode = ckalloc(size); + dataPtr->fromUnicode = Tcl_Alloc(size); memset(dataPtr->fromUnicode, 0, size); dataPtr->toUnicode[0] = (unsigned short *) (dataPtr->toUnicode + 256); @@ -677,68 +729,6 @@ TclFinalizeEncodingSubsystem(void) /* *------------------------------------------------------------------------- * - * Tcl_GetDefaultEncodingDir -- - * - * Legacy public interface to retrieve first directory in the encoding - * searchPath. - * - * Results: - * The directory pathname, as a string, or NULL for an empty encoding - * search path. - * - * Side effects: - * None. - * - *------------------------------------------------------------------------- - */ - -const char * -Tcl_GetDefaultEncodingDir(void) -{ - int numDirs; - Tcl_Obj *first, *searchPath = Tcl_GetEncodingSearchPath(); - - Tcl_ListObjLength(NULL, searchPath, &numDirs); - if (numDirs == 0) { - return NULL; - } - Tcl_ListObjIndex(NULL, searchPath, 0, &first); - - return Tcl_GetString(first); -} - -/* - *------------------------------------------------------------------------- - * - * Tcl_SetDefaultEncodingDir -- - * - * Legacy public interface to set the first directory in the encoding - * search path. - * - * Results: - * None. - * - * Side effects: - * Modifies the encoding search path. - * - *------------------------------------------------------------------------- - */ - -void -Tcl_SetDefaultEncodingDir( - const char *path) -{ - Tcl_Obj *searchPath = Tcl_GetEncodingSearchPath(); - Tcl_Obj *directory = Tcl_NewStringObj(path, -1); - - searchPath = Tcl_DuplicateObj(searchPath); - Tcl_ListObjReplace(NULL, searchPath, 0, 0, 1, &directory); - Tcl_SetEncodingSearchPath(searchPath); -} - -/* - *------------------------------------------------------------------------- - * * Tcl_GetEncoding -- * * Given the name of a encoding, find the corresponding Tcl_Encoding @@ -843,19 +833,17 @@ FreeEncoding( if (encodingPtr == NULL) { return; } - if (encodingPtr->refCount<=0) { - Tcl_Panic("FreeEncoding: refcount problem !!!"); - } - encodingPtr->refCount--; - if (encodingPtr->refCount == 0) { + if (encodingPtr->refCount-- <= 1) { if (encodingPtr->freeProc != NULL) { encodingPtr->freeProc(encodingPtr->clientData); } if (encodingPtr->hPtr != NULL) { Tcl_DeleteHashEntry(encodingPtr->hPtr); } - ckfree(encodingPtr->name); - ckfree(encodingPtr); + if (encodingPtr->name) { + Tcl_Free(encodingPtr->name); + } + Tcl_Free(encodingPtr); } } @@ -976,7 +964,7 @@ Tcl_GetEncodingNames( * Side effects: * The reference count of the new system encoding is incremented. The * reference count of the old system encoding is decremented and it may - * be freed. + * be freed. All VFS cached information is invalidated. * *------------------------------------------------------------------------ */ @@ -1007,6 +995,7 @@ Tcl_SetSystemEncoding( FreeEncoding(systemEncoding); systemEncoding = encoding; Tcl_MutexUnlock(&encodingMutex); + Tcl_FSMountsChanged(NULL); return TCL_OK; } @@ -1042,9 +1031,24 @@ Tcl_CreateEncoding( const Tcl_EncodingType *typePtr) /* The encoding type. */ { + Encoding *encodingPtr = Tcl_Alloc(sizeof(Encoding)); + encodingPtr->name = NULL; + encodingPtr->toUtfProc = typePtr->toUtfProc; + encodingPtr->fromUtfProc = typePtr->fromUtfProc; + encodingPtr->freeProc = typePtr->freeProc; + encodingPtr->nullSize = typePtr->nullSize; + encodingPtr->clientData = typePtr->clientData; + if (typePtr->nullSize == 1) { + encodingPtr->lengthProc = (LengthProc *) strlen; + } else { + encodingPtr->lengthProc = (LengthProc *) unilen; + } + encodingPtr->refCount = 1; + encodingPtr->hPtr = NULL; + + if (typePtr->encodingName) { Tcl_HashEntry *hPtr; int isNew; - Encoding *encodingPtr; char *name; Tcl_MutexLock(&encodingMutex); @@ -1055,30 +1059,17 @@ Tcl_CreateEncoding( * reference goes away. */ - encodingPtr = Tcl_GetHashValue(hPtr); - encodingPtr->hPtr = NULL; + Encoding *replaceMe = Tcl_GetHashValue(hPtr); + replaceMe->hPtr = NULL; } - name = ckalloc(strlen(typePtr->encodingName) + 1); - - encodingPtr = ckalloc(sizeof(Encoding)); + name = Tcl_Alloc(strlen(typePtr->encodingName) + 1); encodingPtr->name = strcpy(name, typePtr->encodingName); - encodingPtr->toUtfProc = typePtr->toUtfProc; - encodingPtr->fromUtfProc = typePtr->fromUtfProc; - encodingPtr->freeProc = typePtr->freeProc; - encodingPtr->nullSize = typePtr->nullSize; - encodingPtr->clientData = typePtr->clientData; - if (typePtr->nullSize == 1) { - encodingPtr->lengthProc = (LengthProc *) strlen; - } else { - encodingPtr->lengthProc = (LengthProc *) unilen; - } - encodingPtr->refCount = 1; encodingPtr->hPtr = hPtr; Tcl_SetHashValue(hPtr, encodingPtr); Tcl_MutexUnlock(&encodingMutex); - + } return (Tcl_Encoding) encodingPtr; } @@ -1108,7 +1099,7 @@ Tcl_ExternalToUtfDString( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for + size_t srcLen, /* Source string length in bytes, or -1 for * encoding-specific string length. */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1116,7 +1107,8 @@ Tcl_ExternalToUtfDString( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int flags, result, soFar, srcRead, dstWrote, dstChars; + size_t dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1129,7 +1121,7 @@ Tcl_ExternalToUtfDString( if (src == NULL) { srcLen = 0; - } else if (srcLen < 0) { + } else if (srcLen == TCL_AUTO_LENGTH) { srcLen = encodingPtr->lengthProc(src); } @@ -1181,8 +1173,8 @@ Tcl_ExternalToUtf( Tcl_Encoding encoding, /* The encoding for the source string, or NULL * for the default system encoding. */ const char *src, /* Source string in specified encoding. */ - int srcLen, /* Source string length in bytes, or < 0 for - * encoding-specific string length. */ + size_t srcLen, /* Source string length in bytes, or -1 + * for encoding-specific string length. */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise @@ -1191,7 +1183,7 @@ Tcl_ExternalToUtf( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string is * stored. */ - int dstLen, /* The maximum length of output buffer in + size_t dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1219,7 +1211,7 @@ Tcl_ExternalToUtf( if (src == NULL) { srcLen = 0; - } else if (srcLen < 0) { + } else if (srcLen == TCL_AUTO_LENGTH) { srcLen = encodingPtr->lengthProc(src); } if (statePtr == NULL) { @@ -1259,7 +1251,7 @@ Tcl_ExternalToUtf( if (*dstCharsPtr <= maxChars) { break; } - dstLen = Tcl_UtfAtIndex(dst, maxChars) - 1 - dst + TCL_UTF_MAX; + dstLen = Tcl_UtfAtIndex(dst, maxChars) - dst + (TCL_UTF_MAX - 1); flags = savedFlags; *statePtr = savedState; } while (1); @@ -1298,7 +1290,7 @@ Tcl_UtfToExternalDString( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for + size_t srcLen, /* Source string length in bytes, or -1 for * strlen(). */ Tcl_DString *dstPtr) /* Uninitialized or free DString in which the * converted string is stored. */ @@ -1306,7 +1298,8 @@ Tcl_UtfToExternalDString( char *dst; Tcl_EncodingState state; const Encoding *encodingPtr; - int flags, dstLen, result, soFar, srcRead, dstWrote, dstChars; + int flags, result, soFar, srcRead, dstWrote, dstChars; + size_t dstLen; Tcl_DStringInit(dstPtr); dst = Tcl_DStringValue(dstPtr); @@ -1319,7 +1312,7 @@ Tcl_UtfToExternalDString( if (src == NULL) { srcLen = 0; - } else if (srcLen < 0) { + } else if (srcLen == TCL_AUTO_LENGTH) { srcLen = strlen(src); } flags = TCL_ENCODING_START | TCL_ENCODING_END; @@ -1373,8 +1366,8 @@ Tcl_UtfToExternal( Tcl_Encoding encoding, /* The encoding for the converted string, or * NULL for the default system encoding. */ const char *src, /* Source string in UTF-8. */ - int srcLen, /* Source string length in bytes, or < 0 for - * strlen(). */ + size_t srcLen, /* Source string length in bytes, or -1 + * for strlen(). */ int flags, /* Conversion control flags. */ Tcl_EncodingState *statePtr,/* Place for conversion routine to store state * information used during a piecewise @@ -1383,7 +1376,7 @@ Tcl_UtfToExternal( * routine under control of flags argument. */ char *dst, /* Output buffer in which converted string * is stored. */ - int dstLen, /* The maximum length of output buffer in + size_t dstLen, /* The maximum length of output buffer in * bytes. */ int *srcReadPtr, /* Filled with the number of bytes from the * source string that were converted. This may @@ -1408,7 +1401,7 @@ Tcl_UtfToExternal( if (src == NULL) { srcLen = 0; - } else if (srcLen < 0) { + } else if (srcLen == TCL_AUTO_LENGTH) { srcLen = strlen(src); } if (statePtr == NULL) { @@ -1440,10 +1433,10 @@ Tcl_UtfToExternal( /* *--------------------------------------------------------------------------- * - * Tcl_InitSubsystems/Tcl_FindExecutable -- + * Tcl_FindExecutable -- * - * This function initializes everything needed for the Tcl library - * to be able to operate. + * This function computes the absolute path name of the current + * application, given its argv[0] value. * * Results: * None. @@ -1454,32 +1447,16 @@ Tcl_UtfToExternal( * *--------------------------------------------------------------------------- */ -MODULE_SCOPE const TclStubs tclStubs; - -static const struct { - const TclStubs *stubs; - const char version[12]; -} stubInfo = { - &tclStubs, TCL_PATCH_LEVEL -}; - -const char * -Tcl_InitSubsystems(TCL_NORETURN1 Tcl_PanicProc *panicProc) -{ - Tcl_SetPanicProc(panicProc); - TclInitSubsystems(); - return stubInfo.version; -} - #undef Tcl_FindExecutable -void +const char * Tcl_FindExecutable( const char *argv0) /* The value of the application's argv[0] * (native). */ { - TclInitSubsystems(); + const char *version = TclInitSubsystems(); TclpSetInitialEncodings(); TclpFindExecutable(argv0); + return version; } /* @@ -1534,10 +1511,10 @@ OpenEncodingFileChannel( } } if (!verified) { - const char *dirString = Tcl_GetString(directory); + const char *dirString = TclGetString(directory); for (i=0; i<numDirs && !verified; i++) { - if (strcmp(dirString, Tcl_GetString(dir[i])) == 0) { + if (strcmp(dirString, TclGetString(dir[i])) == 0) { verified = 1; } } @@ -1736,7 +1713,9 @@ LoadTableEncoding( }; Tcl_DStringInit(&lineString); - Tcl_Gets(chan, &lineString); + if (Tcl_Gets(chan, &lineString) == TCL_IO_FAILURE) { + return NULL; + } line = Tcl_DStringValue(&lineString); fallback = (int) strtol(line, &line, 16); @@ -1755,7 +1734,7 @@ LoadTableEncoding( #undef PAGESIZE #define PAGESIZE (256 * sizeof(unsigned short)) - dataPtr = ckalloc(sizeof(TableEncodingData)); + dataPtr = Tcl_Alloc(sizeof(TableEncodingData)); memset(dataPtr, 0, sizeof(TableEncodingData)); dataPtr->fallback = fallback; @@ -1767,7 +1746,7 @@ LoadTableEncoding( */ size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->toUnicode = ckalloc(size); + dataPtr->toUnicode = Tcl_Alloc(size); memset(dataPtr->toUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->toUnicode + 256); @@ -1776,9 +1755,12 @@ LoadTableEncoding( for (i = 0; i < numPages; i++) { int ch; const char *p; + size_t expected = 3 + 16 * (16 * 4 + 1); - Tcl_ReadChars(chan, objPtr, 3 + 16 * (16 * 4 + 1), 0); - p = Tcl_GetString(objPtr); + if (Tcl_ReadChars(chan, objPtr, expected, 0) != expected) { + return NULL; + } + p = TclGetString(objPtr); hi = (staticHex[UCHAR(p[0])] << 4) + staticHex[UCHAR(p[1])]; dataPtr->toUnicode[hi] = pageMemPtr; p += 2; @@ -1825,7 +1807,7 @@ LoadTableEncoding( } } size = 256 * sizeof(unsigned short *) + numPages * PAGESIZE; - dataPtr->fromUnicode = ckalloc(size); + dataPtr->fromUnicode = Tcl_Alloc(size); memset(dataPtr->fromUnicode, 0, size); pageMemPtr = (unsigned short *) (dataPtr->fromUnicode + 256); @@ -1921,7 +1903,7 @@ LoadTableEncoding( */ for (TclDStringClear(&lineString); - (len = Tcl_Gets(chan, &lineString)) >= 0; + (len = Tcl_Gets(chan, &lineString)) != -1; TclDStringClear(&lineString)) { const unsigned char *p; int to, from; @@ -2015,7 +1997,7 @@ LoadEscapeEncoding( Tcl_DString lineString; Tcl_DStringInit(&lineString); - if (Tcl_Gets(chan, &lineString) < 0) { + if (Tcl_Gets(chan, &lineString) == TCL_IO_FAILURE) { break; } line = Tcl_DStringValue(&lineString); @@ -2057,21 +2039,21 @@ LoadEscapeEncoding( Tcl_DStringAppend(&escapeData, (char *) &est, sizeof(est)); } } - ckfree(argv); + Tcl_Free((void *)argv); Tcl_DStringFree(&lineString); } size = sizeof(EscapeEncodingData) - sizeof(EscapeSubTable) + Tcl_DStringLength(&escapeData); - dataPtr = ckalloc(size); + dataPtr = Tcl_Alloc(size); dataPtr->initLen = strlen(init); - memcpy(dataPtr->init, init, (unsigned) dataPtr->initLen + 1); + memcpy(dataPtr->init, init, dataPtr->initLen + 1); dataPtr->finalLen = strlen(final); - memcpy(dataPtr->final, final, (unsigned) dataPtr->finalLen + 1); + memcpy(dataPtr->final, final, dataPtr->finalLen + 1); dataPtr->numSubTables = Tcl_DStringLength(&escapeData) / sizeof(EscapeSubTable); memcpy(dataPtr->subTables, Tcl_DStringValue(&escapeData), - (size_t) Tcl_DStringLength(&escapeData)); + Tcl_DStringLength(&escapeData)); Tcl_DStringFree(&escapeData); memset(dataPtr->prefixBytes, 0, sizeof(dataPtr->prefixBytes)); @@ -2159,7 +2141,7 @@ BinaryProc( *srcReadPtr = srcLen; *dstWrotePtr = srcLen; *dstCharsPtr = srcLen; - memcpy(dst, src, (size_t) srcLen); + memcpy(dst, src, srcLen); return result; } @@ -2312,8 +2294,11 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } result = TCL_OK; srcStart = src; @@ -2345,7 +2330,7 @@ UtfToUtfProc( } if (UCHAR(*src) < 0x80 && !(UCHAR(*src) == 0 && pureNullMode == 0)) { /* - * Copy 7bit chatacters, but skip null-bytes when we are in input + * Copy 7bit characters, but skip null-bytes when we are in input * mode, so that they get converted to 0xc080. */ @@ -2360,17 +2345,24 @@ UtfToUtfProc( src += 2; } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* - * Always check before using Tcl_UtfToUniChar. Not doing can so - * cause it run beyond the endof the buffer! If we happen such an - * incomplete char its byts are made to represent themselves. + * Always check before using TclUtfToUniChar. Not doing can so + * cause it run beyond the end of the buffer! If we happen such an + * incomplete char its bytes are made to represent themselves. */ - ch = (unsigned char) *src; + *chPtr = (unsigned char) *src; src += 1; - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - src += Tcl_UtfToUniChar(src, &ch); - dst += Tcl_UniCharToUtf(ch, dst); + int len = TclUtfToUniChar(src, chPtr); + src += len; + dst += Tcl_UniCharToUtf(*chPtr, dst); +#if TCL_UTF_MAX <= 4 + if ((*chPtr >= 0xD800) && (len < 3)) { + src += TclUtfToUniChar(src + len, chPtr); + dst += Tcl_UniCharToUtf(*chPtr, dst); + } +#endif } } @@ -2383,9 +2375,9 @@ UtfToUtfProc( /* *------------------------------------------------------------------------- * - * UnicodeToUtfProc -- + * Utf16ToUtfProc -- * - * Convert from Unicode to UTF-8. + * Convert from UTF-16 to UTF-8. * * Results: * Returns TCL_OK if conversion was successful. @@ -2397,8 +2389,8 @@ UtfToUtfProc( */ static int -UnicodeToUtfProc( - ClientData clientData, /* Not used. */ +Utf16ToUtfProc( + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in Unicode. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2426,16 +2418,16 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + unsigned short ch; if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + if ((srcLen % sizeof(unsigned short)) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen /= sizeof(unsigned short); + srcLen *= sizeof(unsigned short); } srcStart = src; @@ -2450,18 +2442,21 @@ UnicodeToUtfProc( break; } + if (clientData) { + ch = (src[1] & 0xFF) << 8 | (src[0] & 0xFF); + } else { + ch = (src[0] & 0xFF) << 8 | (src[1] & 0xFF); + } /* * Special case for 1-byte utf chars for speed. Make sure we work with - * Tcl_UniChar-size data. + * unsigned short-size data. */ - - ch = *(Tcl_UniChar *)src; if (ch && ch < 0x80) { *dst++ = (ch & 0xFF); } else { dst += Tcl_UniCharToUtf(ch, dst); } - src += sizeof(Tcl_UniChar); + src += sizeof(unsigned short); } *srcReadPtr = src - srcStart; @@ -2473,9 +2468,9 @@ UnicodeToUtfProc( /* *------------------------------------------------------------------------- * - * UtfToUnicodeProc -- + * UtfToUtf16Proc -- * - * Convert from UTF-8 to Unicode. + * Convert from UTF-8 to UTF-16. * * Results: * Returns TCL_OK if conversion was successful. @@ -2487,9 +2482,8 @@ UnicodeToUtfProc( */ static int -UtfToUnicodeProc( - ClientData clientData, /* TableEncodingData that specifies - * encoding. */ +UtfToUtf16Proc( + ClientData clientData, /* != NULL means LE, == NUL means BE */ const char *src, /* Source string in UTF-8. */ int srcLen, /* Source string length in bytes. */ int flags, /* Conversion control flags. */ @@ -2516,8 +2510,11 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2543,34 +2540,44 @@ UtfToUnicodeProc( result = TCL_CONVERT_NOSPACE; break; } - src += TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, chPtr); /* * Need to handle this in a way that won't cause misalignment by * casting dst to a Tcl_UniChar. [Bug 1122671] */ -#ifdef WORDS_BIGENDIAN + if (clientData) { #if TCL_UTF_MAX > 4 - *dst++ = (ch >> 24); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = (ch & 0xFF); + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); + } else { + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (*chPtr & 0xFF); + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + } #else - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); #endif -#else + } else { #if TCL_UTF_MAX > 4 - *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = (ch >> 24); + if (*chPtr <= 0xFFFF) { + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); + } else { + *dst++ = ((*chPtr & 0x3) >> 8) | 0xDC; + *dst++ = (*chPtr & 0xFF); + *dst++ = (((*chPtr - 0x10000) >> 18) & 0x3) | 0xD8; + *dst++ = (((*chPtr - 0x10000) >> 10) & 0xFF); + } #else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); -#endif + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); #endif + } } *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; @@ -2581,6 +2588,113 @@ UtfToUnicodeProc( /* *------------------------------------------------------------------------- * + * UtfToUcs2Proc -- + * + * Convert from UTF-8 to UCS-2. + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +UtfToUcs2Proc( + ClientData clientData, /* != NULL means LE, == NUL means BE */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; + int result, numChars; +#if TCL_UTF_MAX <= 4 + int len; +#endif + Tcl_UniChar ch = 0; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= TCL_UTF_MAX; + } + + dstStart = dst; + dstEnd = dst + dstLen - sizeof(Tcl_UniChar); + + result = TCL_OK; + for (numChars = 0; src < srcEnd; numChars++) { + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + /* + * If there is more string to follow, this will ensure that the + * last UTF-8 character in the source buffer hasn't been cut off. + */ + + result = TCL_CONVERT_MULTIBYTE; + break; + } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } +#if TCL_UTF_MAX <= 4 + src += (len = TclUtfToUniChar(src, &ch)); + if ((ch >= 0xD800) && (len < 3)) { + src += TclUtfToUniChar(src, &ch); + ch = 0xFFFD; + } +#else + src += TclUtfToUniChar(src, &ch); + if (ch > 0xFFFF) { + ch = 0xFFFD; + } +#endif + + /* + * Need to handle this in a way that won't cause misalignment by + * casting dst to a Tcl_UniChar. [Bug 1122671] + */ + + if (clientData) { + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); + } else { + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); + } + } + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} + +/* + *------------------------------------------------------------------------- + * * TableToUtfProc -- * * Convert from the encoding specified by the TableEncodingData into @@ -2626,7 +2740,7 @@ TableToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart, *prefixBytes; int result, byte, numChars, charLimit = INT_MAX; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; const unsigned short *const *toUnicode; const unsigned short *pageZero; TableEncodingData *dataPtr = clientData; @@ -2738,7 +2852,7 @@ TableFromUtfProc( { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd, *prefixBytes; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int result, len, word, numChars; TableEncodingData *dataPtr = clientData; const unsigned short *const *fromUnicode; @@ -2770,7 +2884,7 @@ TableFromUtfProc( } len = TclUtfToUniChar(src, &ch); -#if TCL_UTF_MAX > 3 +#if TCL_UTF_MAX > 4 /* * This prevents a crash condition. More evaluation is required for * full support of int Tcl_UniChar. [Bug 1004065] @@ -2779,6 +2893,10 @@ TableFromUtfProc( if (ch & 0xffff0000) { word = 0; } else +#else + if (!len) { + word = 0; + } else #endif word = fromUnicode[(ch >> 8)][ch & 0xff]; @@ -2872,7 +2990,7 @@ Iso88591ToUtfProc( result = TCL_OK; for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; @@ -2958,7 +3076,7 @@ Iso88591FromUtfProc( dstEnd = dst + dstLen - 1; for (numChars = 0; src < srcEnd; numChars++) { - Tcl_UniChar ch; + Tcl_UniChar ch = 0; int len; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -2976,12 +3094,18 @@ Iso88591FromUtfProc( * Check for illegal characters. */ - if (ch > 0xff) { + if (ch > 0xff +#if TCL_UTF_MAX <= 4 + || ((ch >= 0xD800) && (len < 3)) +#endif + ) { if (flags & TCL_ENCODING_STOPONERROR) { result = TCL_CONVERT_UNKNOWN; break; } - +#if TCL_UTF_MAX <= 4 + if ((ch >= 0xD800) && (len < 3)) len = 4; +#endif /* * Plunge on, using '?' as a fallback character. */ @@ -3031,11 +3155,11 @@ TableFreeProc( * Make sure we aren't freeing twice on shutdown. [Bug 219314] */ - ckfree(dataPtr->toUnicode); + Tcl_Free(dataPtr->toUnicode); dataPtr->toUnicode = NULL; - ckfree(dataPtr->fromUnicode); + Tcl_Free(dataPtr->fromUnicode); dataPtr->fromUnicode = NULL; - ckfree(dataPtr); + Tcl_Free(dataPtr); } /* @@ -3330,7 +3454,7 @@ EscapeFromUtfProc( *dstWrotePtr = 0; return TCL_CONVERT_NOSPACE; } - memcpy(dst, dataPtr->init, (size_t)dataPtr->initLen); + memcpy(dst, dataPtr->init, dataPtr->initLen); dst += dataPtr->initLen; } else { state = PTR2INT(*statePtr); @@ -3345,7 +3469,7 @@ EscapeFromUtfProc( for (numChars = 0; src < srcEnd; numChars++) { unsigned len; int word; - Tcl_UniChar ch; + Tcl_UniChar ch = 0; if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { /* @@ -3390,7 +3514,7 @@ EscapeFromUtfProc( /* * The state variable has the value of oldState when word is 0. - * In this case, the escape sequense should not be copied to dst + * In this case, the escape sequence should not be copied to dst * because the current character set is not changed. */ @@ -3408,8 +3532,7 @@ EscapeFromUtfProc( result = TCL_CONVERT_NOSPACE; break; } - memcpy(dst, subTablePtr->sequence, - (size_t) subTablePtr->sequenceLen); + memcpy(dst, subTablePtr->sequence, subTablePtr->sequenceLen); dst += subTablePtr->sequenceLen; } } @@ -3452,7 +3575,7 @@ EscapeFromUtfProc( memcpy(dst, dataPtr->subTables[0].sequence, len); dst += len; } - memcpy(dst, dataPtr->final, (size_t) dataPtr->finalLen); + memcpy(dst, dataPtr->final, dataPtr->finalLen); dst += dataPtr->finalLen; state &= ~TCL_ENCODING_END; } @@ -3514,7 +3637,7 @@ EscapeFreeProc( subTablePtr++; } } - ckfree(dataPtr); + Tcl_Free(dataPtr); } /* @@ -3615,11 +3738,11 @@ unilen( static void InitializeEncodingSearchPath( char **valuePtr, - int *lengthPtr, + size_t *lengthPtr, Tcl_Encoding *encodingPtr) { const char *bytes; - int i, numDirs, numBytes; + int i, numDirs; Tcl_Obj *libPathObj, *encodingObj, *searchPathObj; TclNewLiteralStringObj(encodingObj, "encoding"); @@ -3649,11 +3772,9 @@ InitializeEncodingSearchPath( if (*encodingPtr) { ((Encoding *)(*encodingPtr))->refCount++; } - bytes = Tcl_GetStringFromObj(searchPathObj, &numBytes); - - *lengthPtr = numBytes; - *valuePtr = ckalloc(numBytes + 1); - memcpy(*valuePtr, bytes, (size_t) numBytes + 1); + bytes = TclGetStringFromObj(searchPathObj, lengthPtr); + *valuePtr = Tcl_Alloc(*lengthPtr + 1); + memcpy(*valuePtr, bytes, *lengthPtr + 1); Tcl_DecrRefCount(searchPathObj); } |