summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorhobbs <hobbs>2006-10-05 21:28:17 (GMT)
committerhobbs <hobbs>2006-10-05 21:28:17 (GMT)
commit5a986f03e84121f484f4dab23b5b362eb0eb2f54 (patch)
tree0505e9840d658ab57ca574cc00a28e0e59174a13
parent00c02f61ecfef30ea080d3f881e79fa0c5c5e0dc (diff)
downloadtk-5a986f03e84121f484f4dab23b5b362eb0eb2f54.zip
tk-5a986f03e84121f484f4dab23b5b362eb0eb2f54.tar.gz
tk-5a986f03e84121f484f4dab23b5b362eb0eb2f54.tar.bz2
* unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc):
(TkpFontPkgInit, encodingAliases): Correct alignment issues in encoding conversion. Call ucs-2be "unicode" on big-endian systems. [Bug 1122671]
-rw-r--r--ChangeLog7
-rw-r--r--unix/tkUnixFont.c84
2 files changed, 56 insertions, 35 deletions
diff --git a/ChangeLog b/ChangeLog
index 6f4839b..d12f4bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2006-10-05 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc):
+ (TkpFontPkgInit, encodingAliases): Correct alignment issues in
+ encoding conversion. Call ucs-2be "unicode" on big-endian
+ systems. [Bug 1122671]
+
2006-09-27 Pat Thoyts <patthoyts@users.sourceforge.net>
* win/makefile.vc: Support MSVC8 and AMD64 target. Also try using
diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c
index 66f8550..6cecc1d 100644
--- a/unix/tkUnixFont.c
+++ b/unix/tkUnixFont.c
@@ -9,7 +9,7 @@
* See the file "license.terms" for information on usage and redistribution
* of this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.5 2006/04/25 08:29:36 dkf Exp $
+ * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.6 2006/10/05 21:28:17 hobbs Exp $
*/
#include "tkUnixInt.h"
@@ -190,7 +190,14 @@ static EncodingAlias encodingAliases[] = {
{"tis620", "tis620*"},
{"ksc5601", "ksc5601*"},
{"dingbats", "*dingbats"},
+#ifdef WORDS_BIGENDIAN
+ {"unicode", "iso10646-1"},
+#else
+ /*
+ * ucs-2be is needed if native order isn't BE.
+ */
{"ucs-2be", "iso10646-1"},
+#endif
{NULL, NULL}
};
@@ -250,6 +257,7 @@ static void ReleaseSubFont _ANSI_ARGS_((Display *display,
SubFont *subFontPtr));
static int SeenName _ANSI_ARGS_((CONST char *name,
Tcl_DString *dsPtr));
+#ifndef WORDS_BIGENDIAN
static int Ucs2beToUtfProc _ANSI_ARGS_((ClientData clientData,
CONST char *src, int srcLen, int flags,
Tcl_EncodingState *statePtr, char *dst, int dstLen,
@@ -260,6 +268,7 @@ static int UtfToUcs2beProc _ANSI_ARGS_((ClientData clientData,
Tcl_EncodingState *statePtr, char *dst, int dstLen,
int *srcReadPtr, int *dstWrotePtr,
int *dstCharsPtr));
+#endif
/*
@@ -327,7 +336,7 @@ TkpFontPkgInit(mainPtr)
Tcl_EncodingType type;
SubFont dummy;
int i;
-
+
if (tsdPtr->controlFamily.encoding == NULL) {
type.encodingName = "X11ControlChars";
type.toUtfProc = ControlUtfProc;
@@ -335,7 +344,7 @@ TkpFontPkgInit(mainPtr)
type.freeProc = NULL;
type.clientData = NULL;
type.nullSize = 0;
-
+
tsdPtr->controlFamily.refCount = 2;
tsdPtr->controlFamily.encoding = Tcl_CreateEncoding(&type);
tsdPtr->controlFamily.isTwoByteFont = 0;
@@ -347,9 +356,10 @@ TkpFontPkgInit(mainPtr)
FontMapInsert(&dummy, i + 0x80);
}
+#ifndef WORDS_BIGENDIAN
/*
- * UCS-2BE is unicode in big-endian format.
- * It is used in iso10646 fonts.
+ * UCS-2BE is unicode (UCS-2) in big-endian format. Define this
+ * if native order isn't BE. It is used in iso10646 fonts.
*/
type.encodingName = "ucs-2be";
@@ -359,6 +369,7 @@ TkpFontPkgInit(mainPtr)
type.clientData = NULL;
type.nullSize = 2;
Tcl_CreateEncoding(&type);
+#endif
Tcl_CreateThreadExitHandler(FontPkgCleanup, NULL);
}
}
@@ -456,12 +467,14 @@ ControlUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
return result;
}
+#ifndef WORDS_BIGENDIAN
/*
*-------------------------------------------------------------------------
*
* Ucs2beToUtfProc --
*
* Convert from UCS-2BE (big-endian 16-bit Unicode) to UTF-8.
+ * This is only defined on LE machines.
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -500,40 +513,38 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
* correspond to the bytes stored in the
* output buffer. */
{
- CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
+ CONST char *srcStart, *srcEnd;
char *dstEnd, *dstStart;
int result, numChars;
-
+
result = TCL_OK;
- if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+
+ /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */
+ if ((srcLen % 2) != 0) {
result = TCL_CONVERT_MULTIBYTE;
- srcLen /= sizeof(Tcl_UniChar);
- srcLen *= sizeof(Tcl_UniChar);
+ srcLen--;
}
- wSrc = (Tcl_UniChar *) src;
-
- wSrcStart = (Tcl_UniChar *) src;
- wSrcEnd = (Tcl_UniChar *) (src + srcLen);
+ srcStart = src;
+ srcEnd = src + srcLen;
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; wSrc < wSrcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
}
- /*
- * On a little-endian machine (Intel) the UCS-2BE is in the
- * wrong byte-order in comparison to "unicode", which is
- * in native host order.
+ /*
+ * Need to swap byte-order on little-endian machines (x86) for
+ * UCS-2BE. We know this is an LE->BE swap.
*/
- dst += Tcl_UniCharToUtf(htons(*wSrc), dst);
- wSrc++;
+ dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst);
+ src += 2 /* sizeof(UCS-2) */;
}
- *srcReadPtr = (char *) wSrc - (char *) wSrcStart;
+ *srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
return result;
@@ -544,7 +555,7 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
*
* UtfToUcs2beProc --
*
- * Convert from UTF-8 to UCS-2BE.
+ * Convert from UTF-8 to UCS-2BE (fixed 2-byte encoding).
*
* Results:
* Returns TCL_OK if conversion was successful.
@@ -583,10 +594,10 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
* correspond to the bytes stored in the
* output buffer. */
{
- CONST char *srcStart, *srcEnd, *srcClose;
- Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
+ CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
-
+ Tcl_UniChar ch;
+
srcStart = src;
srcEnd = src + srcLen;
srcClose = srcEnd;
@@ -594,9 +605,8 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
srcClose -= TCL_UTF_MAX;
}
- wDst = (Tcl_UniChar *) dst;
- wDstStart = (Tcl_UniChar *) dst;
- wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar));
+ dstStart = dst;
+ dstEnd = dst + dstLen - 2 /* sizeof(UCS-2) */;
result = TCL_OK;
for (numChars = 0; src < srcEnd; numChars++) {
@@ -609,22 +619,26 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
result = TCL_CONVERT_MULTIBYTE;
break;
}
- if (wDst > wDstEnd) {
+ if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
}
- src += Tcl_UtfToUniChar(src, wDst);
+ src += Tcl_UtfToUniChar(src, &ch);
/*
- * Byte swap for little-endian machines.
+ * Ensure big-endianness (store big bits first).
+ * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
+ * Make sure to work in char* for Tcl_UtfToUniChar alignment.
+ * [Bug 1122671]
*/
- *wDst = htons(*wDst);
- wDst++;
+ *dst++ = (ch >> 8);
+ *dst++ = (ch & 0xFF);
}
*srcReadPtr = src - srcStart;
- *dstWrotePtr = (char *) wDst - (char *) wDstStart;
+ *dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
return result;
}
+#endif /* WORDS_BIGENDIAN */
/*
*---------------------------------------------------------------------------