summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--ChangeLog7
-rw-r--r--generic/tcl.h8
-rw-r--r--generic/tclEncoding.c60
-rw-r--r--tests/encoding.test7
4 files changed, 51 insertions, 31 deletions
diff --git a/ChangeLog b/ChangeLog
index 1ba0c01..5eec923 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2006-10-05 Jeff Hobbs <jeffh@ActiveState.com>
+
+ * generic/tcl.h: note limitation on changing Tcl_UniChar size
+ * generic/tclEncoding.c (UtfToUnicodeProc, UnicodeToUtfProc):
+ * tests/encoding.test (encoding-16.1): fix alignment issues in
+ unicode <> utf conversion procs. [Bug 1122671]
+
2006-10-05 Miguel Sofer <msofer@users.sf.net>
* generic/tclVar.c (Tcl_LappendObjCmd):
diff --git a/generic/tcl.h b/generic/tcl.h
index cc79407..bc89666 100644
--- a/generic/tcl.h
+++ b/generic/tcl.h
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tcl.h,v 1.216 2006/09/26 14:08:36 dgp Exp $
+ * RCS: @(#) $Id: tcl.h,v 1.217 2006/10/05 21:24:39 hobbs Exp $
*/
#ifndef _TCL
@@ -2302,7 +2302,11 @@ typedef struct Tcl_Parse {
/*
* unsigned int isn't 100% accurate as it should be a strict 4-byte value
* (perhaps wchar_t). 64-bit systems may have troubles. The size of this
- * value must be reflected correctly in regcustom.h.
+ * value must be reflected correctly in regcustom.h and
+ * in tclEncoding.c.
+ * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode
+ * XXX: string rep that Tcl_UniChar represents. Changing the size
+ * XXX: of Tcl_UniChar is /not/ supported.
*/
typedef unsigned int Tcl_UniChar;
#else
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c
index a766013..65d6f9d 100644
--- a/generic/tclEncoding.c
+++ b/generic/tclEncoding.c
@@ -8,7 +8,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclEncoding.c,v 1.44 2006/09/26 23:01:11 kennykb Exp $
+ * RCS: @(#) $Id: tclEncoding.c,v 1.45 2006/10/05 21:24:40 hobbs Exp $
*/
#include "tclInt.h"
@@ -2288,9 +2288,10 @@ UnicodeToUtfProc(
* correspond to the bytes stored in the
* output buffer. */
{
- CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd;
+ CONST char *srcStart, *srcEnd;
char *dstEnd, *dstStart;
int result, numChars;
+ Tcl_UniChar ch;
result = TCL_OK;
if ((srcLen % sizeof(Tcl_UniChar)) != 0) {
@@ -2299,33 +2300,31 @@ UnicodeToUtfProc(
srcLen *= sizeof(Tcl_UniChar);
}
- wSrc = (Tcl_UniChar *) src;
-
- wSrcStart = (Tcl_UniChar *) src;
- wSrcEnd = (Tcl_UniChar *) (src + srcLen);
+ srcStart = src;
+ srcEnd = src + srcLen;
dstStart = dst;
dstEnd = dst + dstLen - TCL_UTF_MAX;
- for (numChars = 0; wSrc < wSrcEnd; numChars++) {
+ for (numChars = 0; src < srcEnd; numChars++) {
if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
}
-
/*
- * Special case for 1-byte utf chars for speed.
+ * Special case for 1-byte utf chars for speed. Make sure we
+ * work with Tcl_UniChar-size data.
*/
-
- if (*wSrc && *wSrc < 0x80) {
- *dst++ = (char) *wSrc;
+ ch = *(Tcl_UniChar *)src;
+ if (ch && ch < 0x80) {
+ *dst++ = *src;
} else {
- dst += Tcl_UniCharToUtf(*wSrc, dst);
+ dst += Tcl_UniCharToUtf(ch, dst);
}
- wSrc++;
+ src += sizeof(Tcl_UniChar);
}
- *srcReadPtr = (char *) wSrc - (char *) wSrcStart;
+ *srcReadPtr = src - srcStart;
*dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
return result;
@@ -2375,9 +2374,9 @@ UtfToUnicodeProc(
* correspond to the bytes stored in the
* output buffer. */
{
- CONST char *srcStart, *srcEnd, *srcClose;
- Tcl_UniChar *wDst, *wDstStart, *wDstEnd;
+ CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd;
int result, numChars;
+ Tcl_UniChar ch;
srcStart = src;
srcEnd = src + srcLen;
@@ -2386,9 +2385,8 @@ UtfToUnicodeProc(
srcClose -= TCL_UTF_MAX;
}
- wDst = (Tcl_UniChar *) dst;
- wDstStart = (Tcl_UniChar *) dst;
- wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar));
+ dstStart = dst;
+ dstEnd = dst + dstLen - sizeof(Tcl_UniChar);
result = TCL_OK;
for (numChars = 0; src < srcEnd; numChars++) {
@@ -2401,16 +2399,26 @@ UtfToUnicodeProc(
result = TCL_CONVERT_MULTIBYTE;
break;
}
- if (wDst > wDstEnd) {
+ if (dst > dstEnd) {
result = TCL_CONVERT_NOSPACE;
break;
- }
- src += TclUtfToUniChar(src, wDst);
- wDst++;
+ }
+ src += TclUtfToUniChar(src, &ch);
+ /*
+ * Need to handle this in a way that won't cause misalignment
+ * by casting dst to a Tcl_UniChar. [Bug 1122671]
+ * XXX: This hard-codes the assumed size of Tcl_UniChar as 2.
+ */
+#ifdef WORDS_BIGENDIAN
+ *dst++ = (ch >> 8);
+ *dst++ = (ch & 0xFF);
+#else
+ *dst++ = (ch & 0xFF);
+ *dst++ = (ch >> 8);
+#endif
}
-
*srcReadPtr = src - srcStart;
- *dstWrotePtr = (char *) wDst - (char *) wDstStart;
+ *dstWrotePtr = dst - dstStart;
*dstCharsPtr = numChars;
return result;
}
diff --git a/tests/encoding.test b/tests/encoding.test
index 1de8880..8b7f60e 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -8,7 +8,7 @@
# See the file "license.terms" for information on usage and redistribution
# of this file, and for a DISCLAIMER OF ALL WARRANTIES.
#
-# RCS: @(#) $Id: encoding.test,v 1.24 2006/02/08 21:41:28 dgp Exp $
+# RCS: @(#) $Id: encoding.test,v 1.25 2006/10/05 21:24:40 hobbs Exp $
package require tcltest 2
namespace import -force ::tcltest::*
@@ -307,8 +307,9 @@ test encoding-15.3 {UtfToUtfProc null character input} {
} {1 2 c080}
test encoding-16.1 {UnicodeToUtfProc} {
- encoding convertfrom unicode NN
-} "\u4e4e"
+ set val [encoding convertfrom unicode NN]
+ list $val [format %x [scan $val %c]]
+} "\u4e4e 4e4e"
test encoding-17.1 {UtfToUnicodeProc} {
} {}