diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-11-14 14:48:44 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2019-11-14 14:48:44 (GMT) |
| commit | 0cb2dc120b35cf286c565c683a271710aad03f5f (patch) | |
| tree | 1477b8432a5acbadc489ba1d6bd1ed3265bc7db7 /generic/tclEncoding.c | |
| parent | 8126172c86ecff9bb2ec136731e70ea805aa552b (diff) | |
| parent | 37076881576fb0897e3c1c257ca37cd87685da0c (diff) | |
| download | tcl-0cb2dc120b35cf286c565c683a271710aad03f5f.zip tcl-0cb2dc120b35cf286c565c683a271710aad03f5f.tar.gz tcl-0cb2dc120b35cf286c565c683a271710aad03f5f.tar.bz2 | |
Merge 8.6
Diffstat (limited to 'generic/tclEncoding.c')
| -rw-r--r-- | generic/tclEncoding.c | 37 |
1 files changed, 25 insertions, 12 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 83f4787..48e198b 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2202,7 +2202,7 @@ BinaryProc( /* *------------------------------------------------------------------------- * - * UtfExtToUtfIntProc -- + * UtfIntToUtfExtProc -- * * Convert from UTF-8 to UTF-8. While converting null-bytes from the * Tcl's internal representation (0xc0, 0x80) to the official @@ -2343,7 +2343,7 @@ UtfToUtfProc( * output buffer. */ int pureNullMode) /* Convert embedded nulls from internal * representation to real null-bytes or vice - * versa. */ + * versa. Also combine or separate surrogate pairs */ { const char *srcStart, *srcEnd, *srcClose; const char *dstStart, *dstEnd; @@ -2359,14 +2359,14 @@ UtfToUtfProc( srcEnd = src + srcLen; srcClose = srcEnd; if ((flags & TCL_ENCODING_END) == 0) { - srcClose -= TCL_UTF_MAX; + srcClose -= 6; } if (flags & TCL_ENCODING_CHAR_LIMIT) { charLimit = *dstCharsPtr; } dstStart = dst; - dstEnd = dst + dstLen - TCL_UTF_MAX; + dstEnd = dst + dstLen - ((pureNullMode == 1) ? 4 : TCL_UTF_MAX); for (numChars = 0; src < srcEnd && numChars <= charLimit; numChars++) { if ((src > srcClose) && (!Tcl_UtfCharComplete(src, srcEnd - src))) { @@ -2408,15 +2408,28 @@ UtfToUtfProc( src += 1; dst += Tcl_UniCharToUtf(*chPtr, dst); } else { - int len = TclUtfToUniChar(src, chPtr); - src += len; - dst += Tcl_UniCharToUtf(*chPtr, dst); -#if TCL_UTF_MAX <= 4 - if ((*chPtr >= 0xD800) && (len < 3)) { - src += Tcl_UtfToUniChar(src, chPtr); - dst += Tcl_UniCharToUtf(*chPtr, dst); + src += TclUtfToUniChar(src, chPtr); + if ((*chPtr & 0xFC00) == 0xD800) { + /* A high surrogate character is detected, handle especially */ + Tcl_UniChar low = *chPtr; + size_t len = Tcl_UtfToUniChar(src, &low); + if ((low & 0xFC00) != 0xDC00) { + *dst++ = (char) (((*chPtr >> 12) | 0xE0) & 0xEF); + *dst++ = (char) (((*chPtr >> 6) | 0x80) & 0xBF); + *dst++ = (char) ((*chPtr | 0x80) & 0xBF); + continue; + } else if (pureNullMode == 1) { + int full = (((*chPtr & 0x3FF) << 10) | (low & 0x3FF)) + 0x10000; + *dst++ = (char) (((full >> 18) | 0xF0) & 0xF7); + *dst++ = (char) (((full >> 12) | 0x80) & 0xBF); + *dst++ = (char) (((full >> 6) | 0x80) & 0xBF); + *dst++ = (char) ((full | 0x80) & 0xBF); + *chPtr = 0; + src += len; + continue; + } } -#endif + dst += Tcl_UniCharToUtf(*chPtr, dst); } } |
