diff options
-rw-r--r-- | ChangeLog | 7 | ||||
-rw-r--r-- | generic/tcl.h | 8 | ||||
-rw-r--r-- | generic/tclEncoding.c | 60 | ||||
-rw-r--r-- | tests/encoding.test | 7 |
4 files changed, 51 insertions, 31 deletions
@@ -1,3 +1,10 @@ +2006-10-05 Jeff Hobbs <jeffh@ActiveState.com> + + * generic/tcl.h: note limitation on changing Tcl_UniChar size + * generic/tclEncoding.c (UtfToUnicodeProc, UnicodeToUtfProc): + * tests/encoding.test (encoding-16.1): fix alignment issues in + unicode <> utf conversion procs. [Bug 1122671] + 2006-10-05 Miguel Sofer <msofer@users.sf.net> * generic/tclVar.c (Tcl_LappendObjCmd): diff --git a/generic/tcl.h b/generic/tcl.h index cc79407..bc89666 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -13,7 +13,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tcl.h,v 1.216 2006/09/26 14:08:36 dgp Exp $ + * RCS: @(#) $Id: tcl.h,v 1.217 2006/10/05 21:24:39 hobbs Exp $ */ #ifndef _TCL @@ -2302,7 +2302,11 @@ typedef struct Tcl_Parse { /* * unsigned int isn't 100% accurate as it should be a strict 4-byte value * (perhaps wchar_t). 64-bit systems may have troubles. The size of this - * value must be reflected correctly in regcustom.h. + * value must be reflected correctly in regcustom.h and + * in tclEncoding.c. + * XXX: Tcl is currently UCS-2 and planning UTF-16 for the Unicode + * XXX: string rep that Tcl_UniChar represents. Changing the size + * XXX: of Tcl_UniChar is /not/ supported. */ typedef unsigned int Tcl_UniChar; #else diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index a766013..65d6f9d 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -8,7 +8,7 @@ * See the file "license.terms" for information on usage and redistribution of * this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclEncoding.c,v 1.44 2006/09/26 23:01:11 kennykb Exp $ + * RCS: @(#) $Id: tclEncoding.c,v 1.45 2006/10/05 21:24:40 hobbs Exp $ */ #include "tclInt.h" @@ -2288,9 +2288,10 @@ UnicodeToUtfProc( * correspond to the bytes stored in the * output buffer. */ { - CONST Tcl_UniChar *wSrc, *wSrcStart, *wSrcEnd; + CONST char *srcStart, *srcEnd; char *dstEnd, *dstStart; int result, numChars; + Tcl_UniChar ch; result = TCL_OK; if ((srcLen % sizeof(Tcl_UniChar)) != 0) { @@ -2299,33 +2300,31 @@ UnicodeToUtfProc( srcLen *= sizeof(Tcl_UniChar); } - wSrc = (Tcl_UniChar *) src; - - wSrcStart = (Tcl_UniChar *) src; - wSrcEnd = (Tcl_UniChar *) (src + srcLen); + srcStart = src; + srcEnd = src + srcLen; dstStart = dst; dstEnd = dst + dstLen - TCL_UTF_MAX; - for (numChars = 0; wSrc < wSrcEnd; numChars++) { + for (numChars = 0; src < srcEnd; numChars++) { if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; } - /* - * Special case for 1-byte utf chars for speed. + * Special case for 1-byte utf chars for speed. Make sure we + * work with Tcl_UniChar-size data. */ - - if (*wSrc && *wSrc < 0x80) { - *dst++ = (char) *wSrc; + ch = *(Tcl_UniChar *)src; + if (ch && ch < 0x80) { + *dst++ = *src; } else { - dst += Tcl_UniCharToUtf(*wSrc, dst); + dst += Tcl_UniCharToUtf(ch, dst); } - wSrc++; + src += sizeof(Tcl_UniChar); } - *srcReadPtr = (char *) wSrc - (char *) wSrcStart; + *srcReadPtr = src - srcStart; *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; @@ -2375,9 +2374,9 @@ UtfToUnicodeProc( * correspond to the bytes stored in the * output buffer. */ { - CONST char *srcStart, *srcEnd, *srcClose; - Tcl_UniChar *wDst, *wDstStart, *wDstEnd; + CONST char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; + Tcl_UniChar ch; srcStart = src; srcEnd = src + srcLen; @@ -2386,9 +2385,8 @@ UtfToUnicodeProc( srcClose -= TCL_UTF_MAX; } - wDst = (Tcl_UniChar *) dst; - wDstStart = (Tcl_UniChar *) dst; - wDstEnd = (Tcl_UniChar *) (dst + dstLen - sizeof(Tcl_UniChar)); + dstStart = dst; + dstEnd = dst + dstLen - sizeof(Tcl_UniChar); result = TCL_OK; for (numChars = 0; src < srcEnd; numChars++) { @@ -2401,16 +2399,26 @@ UtfToUnicodeProc( result = TCL_CONVERT_MULTIBYTE; break; } - if (wDst > wDstEnd) { + if (dst > dstEnd) { result = TCL_CONVERT_NOSPACE; break; - } - src += TclUtfToUniChar(src, wDst); - wDst++; + } + src += TclUtfToUniChar(src, &ch); + /* + * Need to handle this in a way that won't cause misalignment + * by casting dst to a Tcl_UniChar. [Bug 1122671] + * XXX: This hard-codes the assumed size of Tcl_UniChar as 2. + */ +#ifdef WORDS_BIGENDIAN + *dst++ = (ch >> 8); + *dst++ = (ch & 0xFF); +#else + *dst++ = (ch & 0xFF); + *dst++ = (ch >> 8); +#endif } - *srcReadPtr = src - srcStart; - *dstWrotePtr = (char *) wDst - (char *) wDstStart; + *dstWrotePtr = dst - dstStart; *dstCharsPtr = numChars; return result; } diff --git a/tests/encoding.test b/tests/encoding.test index 1de8880..8b7f60e 100644 --- a/tests/encoding.test +++ b/tests/encoding.test @@ -8,7 +8,7 @@ # See the file "license.terms" for information on usage and redistribution # of this file, and for a DISCLAIMER OF ALL WARRANTIES. # -# RCS: @(#) $Id: encoding.test,v 1.24 2006/02/08 21:41:28 dgp Exp $ +# RCS: @(#) $Id: encoding.test,v 1.25 2006/10/05 21:24:40 hobbs Exp $ package require tcltest 2 namespace import -force ::tcltest::* @@ -307,8 +307,9 @@ test encoding-15.3 {UtfToUtfProc null character input} { } {1 2 c080} test encoding-16.1 {UnicodeToUtfProc} { - encoding convertfrom unicode NN -} "\u4e4e" + set val [encoding convertfrom unicode NN] + list $val [format %x [scan $val %c]] +} "\u4e4e 4e4e" test encoding-17.1 {UtfToUnicodeProc} { } {} |