From 5a986f03e84121f484f4dab23b5b362eb0eb2f54 Mon Sep 17 00:00:00 2001 From: hobbs Date: Thu, 5 Oct 2006 21:28:17 +0000 Subject: * unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc): (TkpFontPkgInit, encodingAliases): Correct alignment issues in encoding conversion. Call ucs-2be "unicode" on big-endian systems. [Bug 1122671] --- ChangeLog | 7 +++++ unix/tkUnixFont.c | 84 ++++++++++++++++++++++++++++++++----------------------- 2 files changed, 56 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index 6f4839b..d12f4bf 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2006-10-05 Jeff Hobbs + + * unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc): + (TkpFontPkgInit, encodingAliases): Correct alignment issues in + encoding conversion. Call ucs-2be "unicode" on big-endian + systems. [Bug 1122671] + 2006-09-27 Pat Thoyts * win/makefile.vc: Support MSVC8 and AMD64 target. Also try using diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c index 66f8550..6cecc1d 100644 --- a/unix/tkUnixFont.c +++ b/unix/tkUnixFont.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.5 2006/04/25 08:29:36 dkf Exp $ + * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.6 2006/10/05 21:28:17 hobbs Exp $ */ #include "tkUnixInt.h" @@ -190,7 +190,14 @@ static EncodingAlias encodingAliases[] = { {"tis620", "tis620*"}, {"ksc5601", "ksc5601*"}, {"dingbats", "*dingbats"}, +#ifdef WORDS_BIGENDIAN + {"unicode", "iso10646-1"}, +#else + /* + * ucs-2be is needed if native order isn't BE. + */ {"ucs-2be", "iso10646-1"}, +#endif {NULL, NULL} }; @@ -250,6 +257,7 @@ static void ReleaseSubFont _ANSI_ARGS_((Display *display, SubFont *subFontPtr)); static int SeenName _ANSI_ARGS_((CONST char *name, Tcl_DString *dsPtr)); +#ifndef WORDS_BIGENDIAN static int Ucs2beToUtfProc _ANSI_ARGS_((ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, @@ -260,6 +268,7 @@ static int UtfToUcs2beProc _ANSI_ARGS_((ClientData clientData, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr)); +#endif /* @@ -327,7 +336,7 @@ TkpFontPkgInit(mainPtr) Tcl_EncodingType type; SubFont dummy; int i; - + if (tsdPtr->controlFamily.encoding == NULL) { type.encodingName = "X11ControlChars"; type.toUtfProc = ControlUtfProc; @@ -335,7 +344,7 @@ TkpFontPkgInit(mainPtr) type.freeProc = NULL; type.clientData = NULL; type.nullSize = 0; - + tsdPtr->controlFamily.refCount = 2; tsdPtr->controlFamily.encoding = Tcl_CreateEncoding(&type); tsdPtr->controlFamily.isTwoByteFont = 0; @@ -347,9 +356,10 @@ TkpFontPkgInit(mainPtr) FontMapInsert(&dummy, i + 0x80); } +#ifndef WORDS_BIGENDIAN /* - * UCS-2BE is unicode in big-endian format. - * It is used in iso10646 fonts. + * UCS-2BE is unicode (UCS-2) in big-endian format. Define this + * if native order isn't BE. It is used in iso10646 fonts. */ type.encodingName = "ucs-2be"; @@ -359,6 +369,7 @@ TkpFontPkgInit(mainPtr) type.clientData = NULL; type.nullSize = 2; Tcl_CreateEncoding(&type); +#endif Tcl_CreateThreadExitHandler(FontPkgCleanup, NULL); } } @@ -456,12 +467,14 @@ ControlUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, return result; } +#ifndef WORDS_BIGENDIAN /* *------------------------------------------------------------------------- * * Ucs2beToUtfProc -- * * Convert from UCS-2BE (big-endian 16-bit Unicode) to UTF-8. + * This is only defined on LE machines. * * Results: * Returns TCL_OK if conversion was successful. @@ -500,40 +513,38 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * correspond to the bytes stored in the * output buffer. */ { - CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd; + CONST char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; - + result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + + /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */ + if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen--; } - wSrc = (Tcl_UniChar *) src; - - wSrcStart = (Tcl_UniChar *) src; - wSrcEnd = (Tcl_UniChar *) (src + srcLen); + srcStart = src; + srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; wSrc < wSrcEnd; numChars++) { + for (numChars = 0; src < srcEnd; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - /* - * On a little-endian machine (Intel) the UCS-2BE is in the - * wrong byte-order in comparison to "unicode", which is - * in native host order. + /* + * Need to swap byte-order on little-endian machines (x86) for + * UCS-2BE. We know this is an LE->BE swap. */ - dst += Tcl_UniCharToUtf(htons(*wSrc), dst); - wSrc++; + dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst); + src += 2 /* sizeof(UCS-2) */; } - *srcReadPtr = (char *) wSrc - (char *) wSrcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; @@ -544,7 +555,7 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * * UtfToUcs2beProc -- * - * Convert from UTF-8 to UCS-2BE. + * Convert from UTF-8 to UCS-2BE (fixed 2-byte encoding). * * Results: * Returns TCL_OK if conversion was successful. @@ -583,10 +594,10 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; - Tcl_UniChar *wDst, *wDstStart, *wDstEnd; + CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - + Tcl_UniChar ch; + srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -594,9 +605,8 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, srcClose -= TCL_UTF_MAX; } - wDst = (Tcl_UniChar *) dst; - wDstStart = (Tcl_UniChar *) dst; - wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar)); + dstStart = dst; + dstEnd = dst + dstLen - 2 /* sizeof(UCS-2) */; result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { @@ -609,22 +619,26 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen, result = TCL_CONVERT_MULTIBYTE; break; } - if (wDst > wDstEnd) { + if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - src += Tcl_UtfToUniChar(src, wDst); + src += Tcl_UtfToUniChar(src, &ch); /* - * Byte swap for little-endian machines. + * Ensure big-endianness (store big bits first). + * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. + * Make sure to work in char* for Tcl_UtfToUniChar alignment. + * [Bug 1122671] */ - *wDst = htons(*wDst); - wDst++; + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); } *srcReadPtr = src - srcStart; - *dstWrotePtr = (char *) wDst - (char *) wDstStart; + *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } +#endif /* WORDS_BIGENDIAN */ /* *--------------------------------------------------------------------------- -- cgit v0.12