From 5a986f03e84121f484f4dab23b5b362eb0eb2f54 Mon Sep 17 00:00:00 2001
From: hobbs <hobbs>
Date: Thu, 5 Oct 2006 21:28:17 +0000
Subject: 	* unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc): 
 (TkpFontPkgInit, encodingAliases): Correct alignment issues in 
 encoding conversion.  Call ucs-2be "unicode" on big-endian 	systems. [Bug
 1122671]

---
 ChangeLog         |  7 +++++
 unix/tkUnixFont.c | 84 ++++++++++++++++++++++++++++++++-----------------------
 2 files changed, 56 insertions(+), 35 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index 6f4839b..d12f4bf 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2006-10-05  Jeff Hobbs  <jeffh@ActiveState.com>
+
+	* unix/tkUnixFont.c (Ucs2beToUtfProc, UtfToUcs2beProc):
+	(TkpFontPkgInit, encodingAliases): Correct alignment issues in
+	encoding conversion.  Call ucs-2be "unicode" on big-endian
+	systems. [Bug 1122671]
+
 2006-09-27  Pat Thoyts  <patthoyts@users.sourceforge.net>
 
 	* win/makefile.vc:  Support MSVC8 and AMD64 target. Also try using
diff --git a/unix/tkUnixFont.c b/unix/tkUnixFont.c
index 66f8550..6cecc1d 100644
--- a/unix/tkUnixFont.c
+++ b/unix/tkUnixFont.c
@@ -9,7 +9,7 @@
  * See the file "license.terms" for information on usage and redistribution
  * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
  *
- * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.5 2006/04/25 08:29:36 dkf Exp $
+ * RCS: @(#) $Id: tkUnixFont.c,v 1.18.2.6 2006/10/05 21:28:17 hobbs Exp $
  */
  
 #include "tkUnixInt.h"
@@ -190,7 +190,14 @@ static EncodingAlias encodingAliases[] = {
     {"tis620",		"tis620*"},
     {"ksc5601",		"ksc5601*"},
     {"dingbats",	"*dingbats"},
+#ifdef WORDS_BIGENDIAN
+    {"unicode",		"iso10646-1"},
+#else
+    /*
+     * ucs-2be is needed if native order isn't BE.
+     */
     {"ucs-2be",		"iso10646-1"},
+#endif
     {NULL,		NULL}
 };
 
@@ -250,6 +257,7 @@ static void		ReleaseSubFont _ANSI_ARGS_((Display *display,
 			    SubFont *subFontPtr));
 static int		SeenName _ANSI_ARGS_((CONST char *name,
 			    Tcl_DString *dsPtr));
+#ifndef WORDS_BIGENDIAN
 static int		Ucs2beToUtfProc _ANSI_ARGS_((ClientData clientData,
 			    CONST char *src, int srcLen, int flags,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
@@ -260,6 +268,7 @@ static int		UtfToUcs2beProc _ANSI_ARGS_((ClientData clientData,
 			    Tcl_EncodingState *statePtr, char *dst, int dstLen,
 			    int *srcReadPtr, int *dstWrotePtr,
 			    int *dstCharsPtr));
+#endif
 
 
 /*
@@ -327,7 +336,7 @@ TkpFontPkgInit(mainPtr)
     Tcl_EncodingType type;
     SubFont dummy;
     int i;
-    
+
     if (tsdPtr->controlFamily.encoding == NULL) {
 	type.encodingName	= "X11ControlChars";
 	type.toUtfProc		= ControlUtfProc;
@@ -335,7 +344,7 @@ TkpFontPkgInit(mainPtr)
 	type.freeProc		= NULL;
 	type.clientData		= NULL;
 	type.nullSize		= 0;
-	
+
 	tsdPtr->controlFamily.refCount = 2;
 	tsdPtr->controlFamily.encoding = Tcl_CreateEncoding(&type);
 	tsdPtr->controlFamily.isTwoByteFont = 0;
@@ -347,9 +356,10 @@ TkpFontPkgInit(mainPtr)
 	    FontMapInsert(&dummy, i + 0x80);
 	}
 
+#ifndef WORDS_BIGENDIAN
 	/*
-	 * UCS-2BE is unicode in big-endian format.
-	 * It is used in iso10646 fonts.
+	 * UCS-2BE is unicode (UCS-2) in big-endian format.  Define this
+	 * if native order isn't BE.  It is used in iso10646 fonts.
 	 */
 
 	type.encodingName	= "ucs-2be";
@@ -359,6 +369,7 @@ TkpFontPkgInit(mainPtr)
 	type.clientData		= NULL;
 	type.nullSize		= 2;
 	Tcl_CreateEncoding(&type);
+#endif
 	Tcl_CreateThreadExitHandler(FontPkgCleanup, NULL);
     }
 }
@@ -456,12 +467,14 @@ ControlUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
     return result;
 }
 
+#ifndef WORDS_BIGENDIAN
 /*
  *-------------------------------------------------------------------------
  *
  * Ucs2beToUtfProc --
  *
  *	Convert from UCS-2BE (big-endian 16-bit Unicode) to UTF-8.
+ *	This is only defined on LE machines.
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -500,40 +513,38 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
+    CONST char *srcStart, *srcEnd;
     char *dstEnd, *dstStart;
     int result, numChars;
-    
+
     result = TCL_OK;
-    if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
+
+    /* check alignment with ucs-2 (2 == sizeof(UCS-2)) */
+    if ((srcLen % 2) != 0) {
 	result = TCL_CONVERT_MULTIBYTE;
-	srcLen /= sizeof(Tcl_UniChar);
-	srcLen *= sizeof(Tcl_UniChar);
+	srcLen--;
     }
 
-    wSrc = (Tcl_UniChar *) src;
-
-    wSrcStart = (Tcl_UniChar *) src;
-    wSrcEnd = (Tcl_UniChar *) (src + srcLen);
+    srcStart = src;
+    srcEnd = src + srcLen;
 
     dstStart = dst;
     dstEnd = dst + dstLen - TCL_UTF_MAX;
 
-    for (numChars = 0; wSrc < wSrcEnd; numChars++) {
+    for (numChars = 0; src < srcEnd; numChars++) {
 	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
 	}
-	/* 
-	 * On a little-endian machine (Intel) the UCS-2BE is in the
-	 * wrong byte-order in comparison to "unicode", which is
-	 * in native host order.
+	/*
+	 * Need to swap byte-order on little-endian machines (x86) for
+	 * UCS-2BE.  We know this is an LE->BE swap.
 	 */
-	dst += Tcl_UniCharToUtf(htons(*wSrc), dst);
-	wSrc++;
+	dst += Tcl_UniCharToUtf(htons(*((short *)src)), dst);
+	src += 2 /* sizeof(UCS-2) */;
     }
 
-    *srcReadPtr = (char *) wSrc - (char *) wSrcStart;
+    *srcReadPtr = src - srcStart;
     *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
@@ -544,7 +555,7 @@ Ucs2beToUtfProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
  *
  * UtfToUcs2beProc --
  *
- *	Convert from UTF-8 to UCS-2BE.
+ *	Convert from UTF-8 to UCS-2BE (fixed 2-byte encoding).
  *
  * Results:
  *	Returns TCL_OK if conversion was successful.
@@ -583,10 +594,10 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
 				 * correspond to the bytes stored in the
 				 * output buffer. */
 {
-    CONST char *srcStart, *srcEnd, *srcClose;
-    Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
+    CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
     int result, numChars;
-    
+    Tcl_UniChar ch;
+
     srcStart = src;
     srcEnd = src + srcLen;
     srcClose = srcEnd;
@@ -594,9 +605,8 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
 	srcClose -= TCL_UTF_MAX;
     }
 
-    wDst = (Tcl_UniChar *) dst;
-    wDstStart = (Tcl_UniChar *) dst;
-    wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar));
+    dstStart = dst;
+    dstEnd   = dst + dstLen - 2 /* sizeof(UCS-2) */;
 
     result = TCL_OK;
     for (numChars = 0; src < srcEnd; numChars++) {
@@ -609,22 +619,26 @@ UtfToUcs2beProc(clientData, src, srcLen, flags, statePtr, dst, dstLen,
 	    result = TCL_CONVERT_MULTIBYTE;
 	    break;
 	}
-	if (wDst > wDstEnd) {
+	if (dst > dstEnd) {
 	    result = TCL_CONVERT_NOSPACE;
 	    break;
         }
-	src += Tcl_UtfToUniChar(src, wDst);
+	src += Tcl_UtfToUniChar(src, &ch);
 	/*
-	 * Byte swap for little-endian machines.
+	 * Ensure big-endianness (store big bits first).
+	 * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
+	 * Make sure to work in char* for Tcl_UtfToUniChar alignment.
+	 * [Bug 1122671]
 	 */
-	*wDst = htons(*wDst);
-	wDst++;
+	*dst++ = (ch >> 8);
+	*dst++ = (ch & 0xFF);
     }
     *srcReadPtr = src - srcStart;
-    *dstWrotePtr = (char *) wDst - (char *) wDstStart;
+    *dstWrotePtr = dst - dstStart;
     *dstCharsPtr = numChars;
     return result;
 }
+#endif /* WORDS_BIGENDIAN */
 
 /*
  *---------------------------------------------------------------------------
-- 
cgit v0.12