diff options
| author | dgp <dgp@users.sourceforge.net> | 2018-10-17 19:47:01 (GMT) |
|---|---|---|
| committer | dgp <dgp@users.sourceforge.net> | 2018-10-17 19:47:01 (GMT) |
| commit | 1b03667a27e4f198e48c344454663aae4c085a14 (patch) | |
| tree | 70bbdddb4f6bdec0c26dd22cf8ccd499b1c97416 /generic/tclUtf.c | |
| parent | 5c24bebd2b402df644549d6c0efe6586dde65891 (diff) | |
| parent | e154c5151281fbbe01ef1361f5f6980a5ec5a6d3 (diff) | |
| download | tcl-1b03667a27e4f198e48c344454663aae4c085a14.zip tcl-1b03667a27e4f198e48c344454663aae4c085a14.tar.gz tcl-1b03667a27e4f198e48c344454663aae4c085a14.tar.bz2 | |
merge 8.7
Diffstat (limited to 'generic/tclUtf.c')
| -rw-r--r-- | generic/tclUtf.c | 56 |
1 files changed, 42 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 693e210..ce67db7 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -173,6 +173,13 @@ Tcl_UniCharToUtf( buf[0] = (char) ((ch >> 18) | 0xF0); return 4; } + } else if (ch == -1) { + if (((buf[0] & 0xF8) == 0xF0) && ((buf[1] & 0xC0) == 0x80) + && ((buf[2] & 0xCF) == 0)) { + ch = 0xD7C0 + ((buf[0] & 0x07) << 8) + ((buf[1] & 0x3F) << 2) + + ((buf[2] & 0x30) >> 4); + goto three; + } } ch = 0xFFFD; @@ -211,22 +218,31 @@ Tcl_UniCharToUtfDString( { const Tcl_UniChar *w, *wEnd; char *p, *string; - int oldLength; + int oldLength, len = 1; /* * UTF-8 string length in bytes will be <= Unicode string length * 4. */ oldLength = Tcl_DStringLength(dsPtr); - Tcl_DStringSetLength(dsPtr, (oldLength + uniLength + 1) * 4); + Tcl_DStringSetLength(dsPtr, oldLength + (uniLength + 1) * 4); string = Tcl_DStringValue(dsPtr) + oldLength; p = string; wEnd = uniStr + uniLength; for (w = uniStr; w < wEnd; ) { - p += Tcl_UniCharToUtf(*w, p); + if (!len && ((*w & 0xFC00) != 0xDC00)) { + /* Special case for handling upper surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } + len = Tcl_UniCharToUtf(*w, p); + p += len; w++; } + if (!len) { + /* Special case for handling upper surrogates. */ + p += Tcl_UniCharToUtf(-1, p); + } Tcl_DStringSetLength(dsPtr, oldLength + (p - string)); return string; @@ -418,20 +434,32 @@ Tcl_UtfToUniCharDString( */ oldLength = Tcl_DStringLength(dsPtr); -/* TODO: fix overreach! */ + Tcl_DStringSetLength(dsPtr, - (int) ((oldLength + length + 1) * sizeof(Tcl_UniChar))); + oldLength + (int) ((length + 1) * sizeof(Tcl_UniChar))); wString = (Tcl_UniChar *) (Tcl_DStringValue(dsPtr) + oldLength); w = wString; - end = src + length; - for (p = src; p < end; ) { + p = src; + end = src + length - 4; + while (p < end) { p += TclUtfToUniChar(p, &ch); *w++ = ch; } + end += 4; + while (p < end) { + if (Tcl_UtfCharComplete(p, end-p)) { + p += TclUtfToUniChar(p, &ch); + } else if ((unsigned)((UCHAR(*p)-0x80)) < (unsigned) 0x20) { + ch = (Tcl_UniChar) cp1252[UCHAR(*p++)-0x80]; + } else { + ch = UCHAR(*p++); + } + *w++ = ch; + } *w = '\0'; Tcl_DStringSetLength(dsPtr, - (oldLength + ((char *) w - (char *) wString))); + oldLength + ((char *) w - (char *) wString)); return wString; } @@ -755,8 +783,8 @@ Tcl_UniCharAtIndex( * * Returns a pointer to the specified character (not byte) position in * the UTF-8 string. If TCL_UTF_MAX <= 4, characters > U+FFFF count as - * 2 positions, but then the pointer should never be placed between - * the two positions. + * 2 positions, but then the pointer should never be placed between + * the two positions. * * Results: * As above. @@ -892,7 +920,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (bytes < TclUtfCount(upChar)) { + if ((bytes < TclUtfCount(upChar)) || ((upChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -955,7 +983,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (bytes < TclUtfCount(lowChar)) { + if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -1015,7 +1043,7 @@ Tcl_UtfToTitle( #endif titleChar = Tcl_UniCharToTitle(titleChar); - if (bytes < TclUtfCount(titleChar)) { + if ((bytes < TclUtfCount(titleChar)) || ((titleChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { @@ -1039,7 +1067,7 @@ Tcl_UtfToTitle( lowChar = Tcl_UniCharToLower(lowChar); } - if (bytes < TclUtfCount(lowChar)) { + if ((bytes < TclUtfCount(lowChar)) || ((lowChar & 0xF800) == 0xD800)) { memcpy(dst, src, (size_t) bytes); dst += bytes; } else { |
