2 files changed, 52 insertions, 35 deletions
diff --git a/ChangeLog b/ChangeLog
index de88899..d98e78b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2006-10-05  Jeff Hobbs  <jeffh@ActiveState.com>
+
+	* unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc):
+	(TkpFontPkgInit, encodingAliases): Correct alignment issues in
+	encoding conversion.  Call ucs-2be "unicode" on big-endian
+	systems. [Bug 1122671]
+
 2006-09-27  Andreas Kupries  <andreask@activestate.com>
 
 	* unix/Makefile.in (install-binaries): Added a second guard to the
diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c
index 9140023..9174f8e 100644
--- a/unix/tkUnixFont.c
+++ b/unix/tkUnixFont.c
@@ -9,7 +9,7 @@
  * See the file "license.terms" for information on usage and redistribution of
  * this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tkUnixFont.c,v 1.28 2006/04/25 08:18:31 dkf Exp $
+ * RCS: @(#) $Id: tkUnixFont.c,v 1.29 2006/10/05 21:27:43 hobbs Exp $
  */
 
 #include "tkUnixInt.h"
@@ -188,7 +188,14 @@ static EncodingAlias encodingAliases[] = {
     {"tis620",		"tis620*"},
     {"ksc5601",		"ksc5601*"},
     {"dingbats",	"*dingbats"},
+#ifdef WORDS_BIGENDIAN
+    {"unicode",		"iso10646-1"},
+#else
+    /*
+     * ucs-2be is needed if native order isn't BE.
+     */
     {"ucs-2be",		"iso10646-1"},
+#endif
     {NULL,		NULL}
 };
 
@@ -242,6 +249,7 @@ static unsigned int	RankAttributes(FontAttributes *wantPtr,
 static void		ReleaseFont(UnixFont *fontPtr);
 static void		ReleaseSubFont(Display *display, SubFont *subFontPtr);
 static int		SeenName(CONST char *name, Tcl_DString *dsPtr);
+#ifndef WORDS_BIGENDIAN
 static int		Ucs2beToUtfProc(ClientData clientData,
 			    CONST char *src, int srcLen, int flags,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
@@ -252,6 +260,7 @@ static int		UtfToUcs2beProc(ClientData clientData,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
 			    int *srcReadPtr, int *dstWrotePtr,
 			    int *dstCharsPtr);
+#endif
 
 /*
  *-------------------------------------------------------------------------
@@ -339,9 +348,10 @@ TkpFontPkgInit(
 	    FontMapInsert(&dummy, i + 0x80);
 	}
 
+#ifndef WORDS_BIGENDIAN
 	/*
-	 * UCS-2BE is unicode in big-endian format. It is used in iso10646
-	 * fonts.
+	 * UCS-2BE is unicode (UCS-2) in big-endian format.  Define this
+	 * if native order isn't BE.  It is used in iso10646 fonts.
 	 */
 
 	type.encodingName = "ucs-2be";
@@ -351,6 +361,7 @@ TkpFontPkgInit(
 	type.clientData = NULL;
 	type.nullSize = 2;
 	Tcl_CreateEncoding(&type);
+#endif
 	Tcl_CreateThreadExitHandler(FontPkgCleanup, NULL);
     }
 }
@@ -446,12 +457,14 @@ ControlUtfProc(
     return result;
 }
 
+#ifndef WORDS_BIGENDIAN
 /*
  *-------------------------------------------------------------------------
  *
  * Ucs2beToUtfProc --
  *
  *	Convert from UCS-2BE (big-endian 16-bit Unicode) to UTF-8.
+ *	This is only defined on LE machines.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -489,42 +502,38 @@ Ucs2beToUtfProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
+    CONST char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
 
     result = TCL_OK;
-    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+
+    /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */
+    if ((srcLen % 2) != 0) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen /= sizeof(Tcl_UniChar);
-	srcLen *= sizeof(Tcl_UniChar);
+	srcLen--;
     }
 
-    wSrc = (Tcl_UniChar *) src;
-
-    wSrcStart = (Tcl_UniChar *) src;
-    wSrcEnd = (Tcl_UniChar *) (src + srcLen);
+    srcStart = src;
+    srcEnd = src + srcLen;
 
     dstStart = dst;
     dstEnd = dst + dstLen - TCL_UTF_MAX;
 
-    for (numChars = 0; wSrc < wSrcEnd; numChars++) {
+    for (numChars = 0; src < srcEnd; numChars++) {
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-
 	/*
-	 * On a little-endian machine (Intel) the UCS-2BE is in the wrong
-	 * byte-order in comparison to "unicode", which is in native host
-	 * order.
+	 * Need to swap byte-order on little-endian machines (x86) for
+	 * UCS-2BE.  We know this is an LE->BE swap.
 	 */
-
-	dst += Tcl_UniCharToUtf(htons(*wSrc), dst);
-	wSrc++;
+	dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst);
+	src += 2 /* sizeof(UCS-2) */;
     }
 
-    *srcReadPtr = (char *) wSrc - (char *) wSrcStart;
+    *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
@@ -535,7 +544,7 @@ Ucs2beToUtfProc(
  *
  * UtfToUcs2beProc --
  *
- *	Convert from UTF-8 to UCS-2BE.
+ *	Convert from UTF-8 to UCS-2BE (fixed 2-byte encoding).
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -574,9 +583,9 @@ UtfToUcs2beProc(
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST char *srcStart, *srcEnd, *srcClose;
-    Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
+    CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
+    Tcl_UniChar ch;
 
     srcStart = src;
     srcEnd = src + srcLen;
@@ -585,9 +594,8 @@ UtfToUcs2beProc(
 	srcClose -= TCL_UTF_MAX;
     }
 
-    wDst = (Tcl_UniChar *) dst;
-    wDstStart = (Tcl_UniChar *) dst;
-    wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar));
+    dstStart = dst;
+    dstEnd   = dst + dstLen - 2 /* sizeof(UCS-2) */;
 
     result = TCL_OK;
     for (numChars = 0; src < srcEnd; numChars++) {
@@ -600,24 +608,26 @@ UtfToUcs2beProc(
 	    result = TCL_CONVERT_MULTIBYTE;
 	    break;
 	}
-	if (wDst > wDstEnd) {
+	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
-	}
-	src += Tcl_UtfToUniChar(src, wDst);
-
+        }
+	src += Tcl_UtfToUniChar(src, &ch);
 	/*
-	 * Byte swap for little-endian machines.
+	 * Ensure big-endianness (store big bits first).
+	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
+	 * Make sure to work in char* for Tcl_UtfToUniChar alignment.
+	 * [Bug 1122671]
 	 */
-
-	*wDst = htons(*wDst);
-	wDst++;
+	*dst++ = (ch >> 8);
+	*dst++ = (ch & 0xFF);
     }
     *srcReadPtr = src - srcStart;
-    *dstWrotePtr = (char *) wDst - (char *) wDstStart;
+    *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
 }
+#endif /* WORDS_BIGENDIAN */
 
 /*
  *---------------------------------------------------------------------------