From d227cba5138e42a0014e8e3542847ed0b884a208 Mon Sep 17 00:00:00 2001 From: hobbs Date: Thu, 5 Oct 2006 21:27:43 +0000 Subject: * unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc): (TkpFontPkgInit, encodingAliases): Correct alignment issues in encoding conversion. Call ucs-2be "unicode" on big-endian systems. [Bug 1122671] --- ChangeLog | 7 +++++ unix/tkUnixFont.c | 80 +++++++++++++++++++++++++++++++------------------------ 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/ChangeLog b/ChangeLog index de88899..d98e78b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2006-10-05 Jeff Hobbs + + * unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc): + (TkpFontPkgInit, encodingAliases): Correct alignment issues in + encoding conversion. Call ucs-2be "unicode" on big-endian + systems. [Bug 1122671] + 2006-09-27 Andreas Kupries * unix/Makefile.in (install-binaries): Added a second guard to the diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c index 9140023..9174f8e 100644 --- a/unix/tkUnixFont.c +++ b/unix/tkUnixFont.c @@ -9,7 +9,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tkUnixFont.c,v 1.28 2006/04/25 08:18:31 dkf Exp $ + * RCS: @(#) $Id: tkUnixFont.c,v 1.29 2006/10/05 21:27:43 hobbs Exp $ */ #include "tkUnixInt.h" @@ -188,7 +188,14 @@ static EncodingAlias encodingAliases[] = { {"tis620", "tis620*"}, {"ksc5601", "ksc5601*"}, {"dingbats", "*dingbats"}, +#ifdef WORDS_BIGENDIAN + {"unicode", "iso10646-1"}, +#else + /* + * ucs-2be is needed if native order isn't BE. + */ {"ucs-2be", "iso10646-1"}, +#endif {NULL, NULL} }; @@ -242,6 +249,7 @@ static unsigned int RankAttributes(FontAttributes *wantPtr, static void ReleaseFont(UnixFont *fontPtr); static void ReleaseSubFont(Display *display, SubFont *subFontPtr); static int SeenName(CONST char *name, Tcl_DString *dsPtr); +#ifndef WORDS_BIGENDIAN static int Ucs2beToUtfProc(ClientData clientData, CONST char *src, int srcLen, int flags, Tcl_EncodingState *statePtr, char *dst, int dstLen, @@ -252,6 +260,7 @@ static int UtfToUcs2beProc(ClientData clientData, Tcl_EncodingState *statePtr, char *dst, int dstLen, int *srcReadPtr, int *dstWrotePtr, int *dstCharsPtr); +#endif /* *------------------------------------------------------------------------- @@ -339,9 +348,10 @@ TkpFontPkgInit( FontMapInsert(&dummy, i + 0x80); } +#ifndef WORDS_BIGENDIAN /* - * UCS-2BE is unicode in big-endian format. It is used in iso10646 - * fonts. + * UCS-2BE is unicode (UCS-2) in big-endian format. Define this + * if native order isn't BE. It is used in iso10646 fonts. */ type.encodingName = "ucs-2be"; @@ -351,6 +361,7 @@ TkpFontPkgInit( type.clientData = NULL; type.nullSize = 2; Tcl_CreateEncoding(&type); +#endif Tcl_CreateThreadExitHandler(FontPkgCleanup, NULL); } } @@ -446,12 +457,14 @@ ControlUtfProc( return result; } +#ifndef WORDS_BIGENDIAN /* *------------------------------------------------------------------------- * * Ucs2beToUtfProc -- * * Convert from UCS-2BE (big-endian 16-bit Unicode) to UTF-8. + * This is only defined on LE machines. * * Results: * Returns TCL_OK if conversion was successful. @@ -489,42 +502,38 @@ Ucs2beToUtfProc( * correspond to the bytes stored in the * output buffer. */ { - CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd; + CONST char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; result = TCL_OK; - if ((srcLen % sizeof(Tcl_UniChar)) != 0) { + + /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */ + if ((srcLen % 2) != 0) { result = TCL_CONVERT_MULTIBYTE; - srcLen /= sizeof(Tcl_UniChar); - srcLen *= sizeof(Tcl_UniChar); + srcLen--; } - wSrc = (Tcl_UniChar *) src; - - wSrcStart = (Tcl_UniChar *) src; - wSrcEnd = (Tcl_UniChar *) (src + srcLen); + srcStart = src; + srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; wSrc < wSrcEnd; numChars++) { + for (numChars = 0; src < srcEnd; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - /* - * On a little-endian machine (Intel) the UCS-2BE is in the wrong - * byte-order in comparison to "unicode", which is in native host - * order. + * Need to swap byte-order on little-endian machines (x86) for + * UCS-2BE. We know this is an LE->BE swap. */ - - dst += Tcl_UniCharToUtf(htons(*wSrc), dst); - wSrc++; + dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst); + src += 2 /* sizeof(UCS-2) */; } - *srcReadPtr = (char *) wSrc - (char *) wSrcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; @@ -535,7 +544,7 @@ Ucs2beToUtfProc( * * UtfToUcs2beProc -- * - * Convert from UTF-8 to UCS-2BE. + * Convert from UTF-8 to UCS-2BE (fixed 2-byte encoding). * * Results: * Returns TCL_OK if conversion was successful. @@ -574,9 +583,9 @@ UtfToUcs2beProc( * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; - Tcl_UniChar *wDst, *wDstStart, *wDstEnd; + CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; + Tcl_UniChar ch; srcStart = src; srcEnd = src + srcLen; @@ -585,9 +594,8 @@ UtfToUcs2beProc( srcClose -= TCL_UTF_MAX; } - wDst = (Tcl_UniChar *) dst; - wDstStart = (Tcl_UniChar *) dst; - wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar)); + dstStart = dst; + dstEnd = dst + dstLen - 2 /* sizeof(UCS-2) */; result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { @@ -600,24 +608,26 @@ UtfToUcs2beProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (wDst > wDstEnd) { + if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } - src += Tcl_UtfToUniChar(src, wDst); - + } + src += Tcl_UtfToUniChar(src, &ch); /* - * Byte swap for little-endian machines. + * Ensure big-endianness (store big bits first). + * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. + * Make sure to work in char* for Tcl_UtfToUniChar alignment. + * [Bug 1122671] */ - - *wDst = htons(*wDst); - wDst++; + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); } *srcReadPtr = src - srcStart; - *dstWrotePtr = (char *) wDst - (char *) wDstStart; + *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } +#endif /* WORDS_BIGENDIAN */ /* *--------------------------------------------------------------------------- -- cgit v0.12