diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-02-16 11:11:23 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-02-16 11:11:23 (GMT) |
commit | 999decddb383b6dbe467570cb9e3997a48286fa2 (patch) | |
tree | 7b22a7235f69cb59f2f2bdee892d7e27dbb8817f /generic | |
parent | 3001f870632e5d152e76bbc599fea7b27d79b2af (diff) | |
download | tcl-999decddb383b6dbe467570cb9e3997a48286fa2.zip tcl-999decddb383b6dbe467570cb9e3997a48286fa2.tar.gz tcl-999decddb383b6dbe467570cb9e3997a48286fa2.tar.bz2 |
Fix [22324bcbd]: string reverse is broken for Emoji. Thanks to Chrisian Werner for bug report and POC patch.
Diffstat (limited to 'generic')
-rw-r--r-- | generic/tclStringObj.c | 44 |
1 files changed, 41 insertions, 3 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index 33b2139..bdc9c99 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -2899,6 +2899,9 @@ TclStringReverse( { String *stringPtr; Tcl_UniChar ch = 0; +#if TCL_UTF_MAX <= 4 + int needFlip = 0; +#endif if (TclIsPureByteArray(objPtr)) { int numBytes; @@ -2917,10 +2920,9 @@ TclStringReverse( if (stringPtr->hasUnicode) { Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; + Tcl_UniChar *to; if (Tcl_IsShared(objPtr)) { - Tcl_UniChar *to; - /* * Create a non-empty, pure unicode value, so we can coax * Tcl_SetObjLength into growing the unicode rep buffer. @@ -2930,19 +2932,54 @@ TclStringReverse( Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); while (--src >= from) { +#if TCL_UTF_MAX <= 4 + ch = *src; + if ((ch & 0xF800) == 0xD800) { + needFlip = 1; + } + *to++ = ch; +#else *to++ = *src; +#endif } } else { /* * Reversing in place. */ +#if TCL_UTF_MAX <= 4 + to = src; +#endif while (--src > from) { ch = *src; +#if TCL_UTF_MAX <= 4 + if ((ch & 0xF800) == 0xD800) { + needFlip = 1; + } +#endif *src = *from; *from++ = ch; } } +#if TCL_UTF_MAX <= 4 + if (needFlip) { + /* + * Flip back surrogate pairs. + */ + + from = to - stringPtr->numChars; + while (--to >= from) { + ch = *to; + if ((ch & 0xFC00) == 0xD800) { + if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) { + to[0] = to[-1]; + to[-1] = ch; + --to; + } + } + } + } +#endif } if (objPtr->bytes) { @@ -2968,6 +3005,7 @@ TclStringReverse( int charCount = 0; int bytesLeft = numBytes; + int chw; while (bytesLeft) { /* @@ -2976,7 +3014,7 @@ TclStringReverse( * skip calling Tcl_UtfCharComplete() here. */ - int bytesInChar = TclUtfToUniChar(from, &ch); + int bytesInChar = TclUtfToUCS4(from, &chw); ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); |