summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-02-16 11:11:23 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-02-16 11:11:23 (GMT)
commit999decddb383b6dbe467570cb9e3997a48286fa2 (patch)
tree7b22a7235f69cb59f2f2bdee892d7e27dbb8817f /generic
parent3001f870632e5d152e76bbc599fea7b27d79b2af (diff)
downloadtcl-999decddb383b6dbe467570cb9e3997a48286fa2.zip
tcl-999decddb383b6dbe467570cb9e3997a48286fa2.tar.gz
tcl-999decddb383b6dbe467570cb9e3997a48286fa2.tar.bz2
Fix [22324bcbd]: string reverse is broken for Emoji. Thanks to Chrisian Werner for bug report and POC patch.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclStringObj.c44
1 files changed, 41 insertions, 3 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index 33b2139..bdc9c99 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -2899,6 +2899,9 @@ TclStringReverse(
{
String *stringPtr;
Tcl_UniChar ch = 0;
+#if TCL_UTF_MAX <= 4
+ int needFlip = 0;
+#endif
if (TclIsPureByteArray(objPtr)) {
int numBytes;
@@ -2917,10 +2920,9 @@ TclStringReverse(
if (stringPtr->hasUnicode) {
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
+ Tcl_UniChar *to;
if (Tcl_IsShared(objPtr)) {
- Tcl_UniChar *to;
-
/*
* Create a non-empty, pure unicode value, so we can coax
* Tcl_SetObjLength into growing the unicode rep buffer.
@@ -2930,19 +2932,54 @@ TclStringReverse(
Tcl_SetObjLength(objPtr, stringPtr->numChars);
to = Tcl_GetUnicode(objPtr);
while (--src >= from) {
+#if TCL_UTF_MAX <= 4
+ ch = *src;
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+ *to++ = ch;
+#else
*to++ = *src;
+#endif
}
} else {
/*
* Reversing in place.
*/
+#if TCL_UTF_MAX <= 4
+ to = src;
+#endif
while (--src > from) {
ch = *src;
+#if TCL_UTF_MAX <= 4
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+#endif
*src = *from;
*from++ = ch;
}
}
+#if TCL_UTF_MAX <= 4
+ if (needFlip) {
+ /*
+ * Flip back surrogate pairs.
+ */
+
+ from = to - stringPtr->numChars;
+ while (--to >= from) {
+ ch = *to;
+ if ((ch & 0xFC00) == 0xD800) {
+ if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) {
+ to[0] = to[-1];
+ to[-1] = ch;
+ --to;
+ }
+ }
+ }
+ }
+#endif
}
if (objPtr->bytes) {
@@ -2968,6 +3005,7 @@ TclStringReverse(
int charCount = 0;
int bytesLeft = numBytes;
+ int chw;
while (bytesLeft) {
/*
@@ -2976,7 +3014,7 @@ TclStringReverse(
* skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = TclUtfToUniChar(from, &ch);
+ int bytesInChar = TclUtfToUCS4(from, &chw);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);