summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-02-16 12:17:49 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-02-16 12:17:49 (GMT)
commit8844e2789a8e8d854f53069ea852ec5ef726757c (patch)
tree04640dd662a24ea83c7ac4075f99c6b444730fb0
parent94c40f8b7100eb40e174041600f31ec60c4b7bc2 (diff)
parent532506ea202974a816409de319b0a66d9173ce74 (diff)
downloadtcl-8844e2789a8e8d854f53069ea852ec5ef726757c.zip
tcl-8844e2789a8e8d854f53069ea852ec5ef726757c.tar.gz
tcl-8844e2789a8e8d854f53069ea852ec5ef726757c.tar.bz2
Merge 8.7
-rw-r--r--generic/tclStringObj.c47
-rw-r--r--tests/string.test8
2 files changed, 45 insertions, 10 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index bb718ce..4b44517 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -3809,6 +3809,9 @@ TclStringReverse(
String *stringPtr;
Tcl_UniChar ch = 0;
int inPlace = flags & TCL_STRING_IN_PLACE;
+#if TCL_UTF_MAX < 4
+ int needFlip = 0;
+#endif
if (TclIsPureByteArray(objPtr)) {
int numBytes;
@@ -3827,10 +3830,9 @@ TclStringReverse(
if (stringPtr->hasUnicode) {
Tcl_UniChar *from = Tcl_GetUnicode(objPtr);
Tcl_UniChar *src = from + stringPtr->numChars;
+ Tcl_UniChar *to;
if (!inPlace || Tcl_IsShared(objPtr)) {
- Tcl_UniChar *to;
-
/*
* Create a non-empty, pure unicode value, so we can coax
* Tcl_SetObjLength into growing the unicode rep buffer.
@@ -3840,19 +3842,54 @@ TclStringReverse(
Tcl_SetObjLength(objPtr, stringPtr->numChars);
to = Tcl_GetUnicode(objPtr);
while (--src >= from) {
+#if TCL_UTF_MAX < 4
+ ch = *src;
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+ *to++ = ch;
+#else
*to++ = *src;
+#endif
}
} else {
/*
* Reversing in place.
*/
+#if TCL_UTF_MAX < 4
+ to = src;
+#endif
while (--src > from) {
ch = *src;
+#if TCL_UTF_MAX < 4
+ if ((ch & 0xF800) == 0xD800) {
+ needFlip = 1;
+ }
+#endif
*src = *from;
*from++ = ch;
}
}
+#if TCL_UTF_MAX < 4
+ if (needFlip) {
+ /*
+ * Flip back surrogate pairs.
+ */
+
+ from = to - stringPtr->numChars;
+ while (--to >= from) {
+ ch = *to;
+ if ((ch & 0xFC00) == 0xD800) {
+ if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) {
+ to[0] = to[-1];
+ to[-1] = ch;
+ --to;
+ }
+ }
+ }
+ }
+#endif
}
if (objPtr->bytes) {
@@ -3876,8 +3913,8 @@ TclStringReverse(
* Pass 1. Reverse the bytes of each multi-byte character.
*/
- int charCount = 0;
int bytesLeft = numBytes;
+ int chw;
while (bytesLeft) {
/*
@@ -3886,18 +3923,16 @@ TclStringReverse(
* skip calling Tcl_UtfCharComplete() here.
*/
- int bytesInChar = TclUtfToUniChar(from, &ch);
+ int bytesInChar = TclUtfToUCS4(from, &chw);
ReverseBytes((unsigned char *)to, (unsigned char *)from,
bytesInChar);
to += bytesInChar;
from += bytesInChar;
bytesLeft -= bytesInChar;
- charCount++;
}
from = to = objPtr->bytes;
- stringPtr->numChars = charCount;
}
/* Pass 2. Reverse all the bytes. */
ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes);
diff --git a/tests/string.test b/tests/string.test
index b01d059..0eaa3da 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -2086,19 +2086,19 @@ test string-24.15.$noComp {string reverse command - pure bytearray} {
binary scan [run {tcl::string::reverse [binary format H* 010203]}] H* x
set x
} 030201
-test string-24.16.$noComp {string reverse command - surrogates} knownBug {
+test string-24.16.$noComp {string reverse command - surrogates} {
run {string reverse \u0444bulb\uD83D\uDE02}
} \uD83D\uDE02blub\u0444
-test string-24.17.$noComp {string reverse command - surrogates} knownBug {
+test string-24.17.$noComp {string reverse command - surrogates} {
run {string reverse \uD83D\uDE02hello\uD83D\uDE02}
} \uD83D\uDE02olleh\uD83D\uDE02
-test string-24.18.$noComp {string reverse command - surrogates} knownBug {
+test string-24.18.$noComp {string reverse command - surrogates} {
set s \u0444bulb\uD83D\uDE02
# shim shimmery ...
string index $s 0
run {string reverse $s}
} \uD83D\uDE02blub\u0444
-test string-24.19.$noComp {string reverse command - surrogates} knownBug {
+test string-24.19.$noComp {string reverse command - surrogates} {
set s \uD83D\uDE02hello\uD83D\uDE02
# shim shimmery ...
string index $s 0