diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-08-31 12:21:43 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2017-08-31 12:21:43 (GMT) |
commit | 1bfa907494ead157fc4679050d4de1a4376664e1 (patch) | |
tree | 890afab76c839baf2b44d040003cf2b8e9f64bad | |
parent | 77770594c9c9ae90a3a91b566d775f7a04e9515d (diff) | |
parent | a772eb568d5ebc987782fef1fc6b913fe6bc4df8 (diff) | |
download | tcl-1bfa907494ead157fc4679050d4de1a4376664e1.zip tcl-1bfa907494ead157fc4679050d4de1a4376664e1.tar.gz tcl-1bfa907494ead157fc4679050d4de1a4376664e1.tar.bz2 |
(merge-mark core-8-6-branch).
Give state to UtfToUtfProc() and UnicodeToUtfProc() in which the last (intermediate) Tcl_UniChar can be remembered: This way those functions can produce 4-byte UTF-8 sequences built from surrogate pairs if Tcl is compiled with TCL_UTF_MAX=4. For TCL_UTF_MAX=3 and TCL_UTF_MAX=6 this has no effect.
-rw-r--r-- | generic/tclEncoding.c | 57 |
1 files changed, 33 insertions, 24 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 7ab33f8..d430b32 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2293,8 +2293,11 @@ UtfToUtfProc( const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch = 0; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } result = TCL_OK; srcStart = src; @@ -2346,12 +2349,12 @@ UtfToUtfProc( * incomplete char its bytes are made to represent themselves. */ - ch = (unsigned char) *src; + *chPtr = (unsigned char) *src; src += 1; - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - src += TclUtfToUniChar(src, &ch); - dst += Tcl_UniCharToUtf(ch, dst); + src += TclUtfToUniChar(src, chPtr); + dst += Tcl_UniCharToUtf(*chPtr, dst); } } @@ -2407,8 +2410,11 @@ UnicodeToUtfProc( const char *srcStart, *srcEnd; const char *dstEnd, *dstStart; int result, numChars, charLimit = INT_MAX; - Tcl_UniChar ch = 0; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } @@ -2436,11 +2442,11 @@ UnicodeToUtfProc( * Tcl_UniChar-size data. */ - ch = *(Tcl_UniChar *)src; - if (ch && ch < 0x80) { - *dst++ = (ch & 0xFF); + *chPtr = *(Tcl_UniChar *)src; + if (*chPtr && *chPtr < 0x80) { + *dst++ = (*chPtr & 0xFF); } else { - dst += Tcl_UniCharToUtf(ch, dst); + dst += Tcl_UniCharToUtf(*chPtr, dst); } src += sizeof(Tcl_UniChar); } @@ -2497,8 +2503,11 @@ UtfToUnicodeProc( { const char *srcStart, *srcEnd, *srcClose, *dstStart, *dstEnd; int result, numChars; - Tcl_UniChar ch = 0; + Tcl_UniChar *chPtr = (Tcl_UniChar *) statePtr; + if (flags & TCL_ENCODING_START) { + *statePtr = 0; + } srcStart = src; srcEnd = src + srcLen; srcClose = srcEnd; @@ -2524,7 +2533,7 @@ UtfToUnicodeProc( result = TCL_CONVERT_NOSPACE; break; } - src += TclUtfToUniChar(src, &ch); + src += TclUtfToUniChar(src, chPtr); /* * Need to handle this in a way that won't cause misalignment by @@ -2533,23 +2542,23 @@ UtfToUnicodeProc( #ifdef WORDS_BIGENDIAN #if TCL_UTF_MAX > 4 - *dst++ = (ch >> 24); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = (ch & 0xFF); + *dst++ = (*chPtr >> 24); + *dst++ = ((*chPtr >> 16) & 0xFF); + *dst++ = ((*chPtr >> 8) & 0xFF); + *dst++ = (*chPtr & 0xFF); #else - *dst++ = (ch >> 8); - *dst++ = (ch & 0xFF); + *dst++ = (*chPtr >> 8); + *dst++ = (*chPtr & 0xFF); #endif #else #if TCL_UTF_MAX > 4 - *dst++ = (ch & 0xFF); - *dst++ = ((ch >> 8) & 0xFF); - *dst++ = ((ch >> 16) & 0xFF); - *dst++ = (ch >> 24); + *dst++ = (*chPtr & 0xFF); + *dst++ = ((*chPtr >> 8) & 0xFF); + *dst++ = ((*chPtr >> 16) & 0xFF); + *dst++ = (*chPtr >> 24); #else - *dst++ = (ch & 0xFF); - *dst++ = (ch >> 8); + *dst++ = (*chPtr & 0xFF); + *dst++ = (*chPtr >> 8); #endif #endif } |