diff options
Diffstat (limited to 'generic/tclEncoding.c')
-rw-r--r-- | generic/tclEncoding.c | 283 |
1 files changed, 279 insertions, 4 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 2548b73..b6dcb8f 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -234,6 +234,9 @@ static int TableToUtfProc(ClientData clientData, const char *src, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); static size_t unilen(const char *src); +#if TCL_UTF_MAX > 4 +static size_t unilen4(const char *src); +#endif static int UnicodeToUtfProc(ClientData clientData, const char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, @@ -269,6 +272,18 @@ static int Iso88591ToUtfProc(ClientData clientData, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +#if TCL_UTF_MAX > 4 +static int Utf16ToUtfProc(ClientData clientData, + const char *src, int srcLen, int flags, + Tcl_EncodingState *statePtr, char *dst, int dstLen, + int *srcReadPtr, int *dstWrotePtr, + int *dstCharsPtr); +static int UtfToUtf16Proc(ClientData clientData, + const char *src, int srcLen, int flags, + Tcl_EncodingState *statePtr, char *dst, int dstLen, + int *srcReadPtr, int *dstWrotePtr, + int *dstCharsPtr); +#endif /* * A Tcl_ObjType for holding a cached Tcl_Encoding in the twoPtrValue.ptr1 field @@ -578,13 +593,31 @@ TclInitEncodingSubsystem(void) type.clientData = NULL; Tcl_CreateEncoding(&type); +#if TCL_UTF_MAX > 4 + type.encodingName = "utf-32"; +#else type.encodingName = "unicode"; +#endif type.toUtfProc = UnicodeToUtfProc; type.fromUtfProc = UtfToUnicodeProc; type.freeProc = NULL; +#if TCL_UTF_MAX > 4 + type.nullSize = 4; +#else + type.nullSize = 2; +#endif + type.clientData = NULL; + Tcl_CreateEncoding(&type); + +#if TCL_UTF_MAX > 4 + type.encodingName = "unicode"; + type.toUtfProc = Utf16ToUtfProc; + type.fromUtfProc = UtfToUtf16Proc; + type.freeProc = NULL; type.nullSize = 2; type.clientData = NULL; Tcl_CreateEncoding(&type); +#endif /* * Need the iso8859-1 encoding in order to process binary data, so force @@ -1071,6 +1104,10 @@ Tcl_CreateEncoding( encodingPtr->clientData = typePtr->clientData; if (typePtr->nullSize == 1) { encodingPtr->lengthProc = (LengthProc *) strlen; +#if TCL_UTF_MAX > 4 + } else if (typePtr->nullSize == 4) { + encodingPtr->lengthProc = (LengthProc *) unilen4; +#endif } else { encodingPtr->lengthProc = (LengthProc *) unilen; } @@ -1441,10 +1478,10 @@ Tcl_UtfToExternal( /* *--------------------------------------------------------------------------- * - * Tcl_FindExecutable -- + * Tcl_InitSubsystems/Tcl_FindExecutable -- * - * This function computes the absolute path name of the current - * application, given its argv[0] value. + * This function initializes everything needed for the Tcl library + * to be able to operate. * * Results: * None. @@ -1455,6 +1492,23 @@ Tcl_UtfToExternal( * *--------------------------------------------------------------------------- */ +MODULE_SCOPE const TclStubs tclStubs; + +static const struct { + const TclStubs *stubs; + const char version[12]; +} stubInfo = { + &tclStubs, TCL_PATCH_LEVEL +}; + +const char * +Tcl_InitSubsystems(TCL_NORETURN1 Tcl_PanicProc *panicProc) +{ + Tcl_SetPanicProc(panicProc); + TclInitSubsystems(); + return stubInfo.version; +} + #undef Tcl_FindExecutable void Tcl_FindExecutable( @@ -2987,6 +3041,212 @@ Iso88591FromUtfProc( return result; } +#if TCL_UTF_MAX > 4 +/* + *------------------------------------------------------------------------- + * + * Utf16ToUtfProc -- + * + * Convert from UTF-16 to UTF-8. + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +Utf16ToUtfProc( + ClientData clientData, /* Not used. */ + const char *src, /* Source string in Unicode. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd; + const char *dstEnd, *dstStart; + int result, numChars, charLimit = INT_MAX; + Tcl_UniChar ch; + + if (flags & TCL_ENCODING_CHAR_LIMIT) { + charLimit = *dstCharsPtr; + } + result = TCL_OK; + if ((srcLen % sizeof(unsigned short)) != 0) { + result = TCL_CONVERT_MULTIBYTE; + srcLen /= sizeof(unsigned short); + srcLen *= sizeof(unsigned short); + } + + srcStart = src; + srcEnd = src + srcLen; + + dstStart = dst; + dstEnd = dst + dstLen - TCL_UTF_MAX; + + for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + ch = *(unsigned short *)src; + if (ch && ch < 0x80) { + *dst++ = (ch & 0xFF); + } else if ((ch >= 0xD800) && (ch <= 0xDBFF)) { + Tcl_UniChar ch2 = *(unsigned short *)(src + sizeof(unsigned short)); + + if ((ch2 >= 0xDC00) && (ch2 <= 0xDFFF)) { + ch = ((ch & 0x3FF) << 10) + 0x10000 + (ch2 & 0x3FF); + src += sizeof(unsigned short); + } + dst += Tcl_UniCharToUtf(ch, dst); + } else { + dst += Tcl_UniCharToUtf(ch, dst); + } + src += sizeof(unsigned short); + } + + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} +#endif + +#if TCL_UTF_MAX > 4 +/* + *------------------------------------------------------------------------- + * + * UtfToUtf16Proc -- + * + * Convert from UTF-8 to UTF-16. + * + * Results: + * Returns TCL_OK if conversion was successful. + * + * Side effects: + * None. + * + *------------------------------------------------------------------------- + */ + +static int +UtfToUtf16Proc( + ClientData clientData, /* TableEncodingData that specifies + * encoding. */ + const char *src, /* Source string in UTF-8. */ + int srcLen, /* Source string length in bytes. */ + int flags, /* Conversion control flags. */ + Tcl_EncodingState *statePtr,/* Place for conversion routine to store state + * information used during a piecewise + * conversion. Contents of statePtr are + * initialized and/or reset by conversion + * routine under control of flags argument. */ + char *dst, /* Output buffer in which converted string is + * stored. */ + int dstLen, /* The maximum length of output buffer in + * bytes. */ + int *srcReadPtr, /* Filled with the number of bytes from the + * source string that were converted. This may + * be less than the original source length if + * there was a problem converting some source + * characters. */ + int *dstWrotePtr, /* Filled with the number of bytes that were + * stored in the output buffer as a result of + * the conversion. */ + int *dstCharsPtr) /* Filled with the number of characters that + * correspond to the bytes stored in the + * output buffer. */ +{ + const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; + int result, numChars; + Tcl_UniChar ch; + + srcStart = src; + srcEnd = src + srcLen; + srcClose = srcEnd; + if ((flags & TCL_ENCODING_END) == 0) { + srcClose -= TCL_UTF_MAX; + } + + dstStart = dst; + dstEnd = dst + dstLen - 2 * sizeof(unsigned short); + + result = TCL_OK; + for (numChars = 0; src < srcEnd; numChars++) { + if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { + /* + * If there is more string to follow, this will ensure that the + * last UTF-8 character in the source buffer hasn't been cut off. + */ + + result = TCL_CONVERT_MULTIBYTE; + break; + } + if (dst > dstEnd) { + result = TCL_CONVERT_NOSPACE; + break; + } + src += TclUtfToUniChar(src, &ch); + + if (ch > 0x10FFFF) { + ch = 0xFFFD; + } + + /* + * Need to handle this in a way that won't cause misalignment by + * casting dst to a Tcl_UniChar. [Bug 1122671] + */ + + if (ch > 0xFFFF) { + int high = (((ch - 0x10000) >> 10) & 0x3FF) | 0xD800; + + ch = ((ch - 0x10000) & 0x3FF) | 0xDC00; +#ifdef WORDS_BIGENDIAN + *dst++ = ((high >> 8) & 0xFF); + *dst++ = (high & 0xFF); +#else + *dst++ = (high & 0xFF); + *dst++ = ((high >> 8) & 0xFF); +#endif + } +#ifdef WORDS_BIGENDIAN + *dst++ = ((ch >> 8) & 0xFF); + *dst++ = (ch & 0xFF); +#else + *dst++ = (ch & 0xFF); + *dst++ = ((ch >> 8) & 0xFF); +#endif + } + *srcReadPtr = src - srcStart; + *dstWrotePtr = dst - dstStart; + *dstCharsPtr = numChars; + return result; +} +#endif + /* *--------------------------------------------------------------------------- * @@ -3545,7 +3805,7 @@ GetTableEncoding( /* *--------------------------------------------------------------------------- * - * unilen -- + * unilen, unilen4 -- * * A helper function for the Tcl_ExternalToUtf functions. This function * is similar to strlen for double-byte characters: it returns the number @@ -3572,6 +3832,21 @@ unilen( } return (char *) p - src; } + +#if TCL_UTF_MAX > 4 +static size_t +unilen4( + const char *src) +{ + unsigned int *p; + + p = (unsigned int *) src; + while (*p != 0x00000000) { + p++; + } + return (char *) p - src; +} +#endif /* *------------------------------------------------------------------------- |