diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-02-16 12:17:49 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2021-02-16 12:17:49 (GMT) |
commit | 8844e2789a8e8d854f53069ea852ec5ef726757c (patch) | |
tree | 04640dd662a24ea83c7ac4075f99c6b444730fb0 | |
parent | 94c40f8b7100eb40e174041600f31ec60c4b7bc2 (diff) | |
parent | 532506ea202974a816409de319b0a66d9173ce74 (diff) | |
download | tcl-8844e2789a8e8d854f53069ea852ec5ef726757c.zip tcl-8844e2789a8e8d854f53069ea852ec5ef726757c.tar.gz tcl-8844e2789a8e8d854f53069ea852ec5ef726757c.tar.bz2 |
Merge 8.7
-rw-r--r-- | generic/tclStringObj.c | 47 | ||||
-rw-r--r-- | tests/string.test | 8 |
2 files changed, 45 insertions, 10 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c index bb718ce..4b44517 100644 --- a/generic/tclStringObj.c +++ b/generic/tclStringObj.c @@ -3809,6 +3809,9 @@ TclStringReverse( String *stringPtr; Tcl_UniChar ch = 0; int inPlace = flags & TCL_STRING_IN_PLACE; +#if TCL_UTF_MAX < 4 + int needFlip = 0; +#endif if (TclIsPureByteArray(objPtr)) { int numBytes; @@ -3827,10 +3830,9 @@ TclStringReverse( if (stringPtr->hasUnicode) { Tcl_UniChar *from = Tcl_GetUnicode(objPtr); Tcl_UniChar *src = from + stringPtr->numChars; + Tcl_UniChar *to; if (!inPlace || Tcl_IsShared(objPtr)) { - Tcl_UniChar *to; - /* * Create a non-empty, pure unicode value, so we can coax * Tcl_SetObjLength into growing the unicode rep buffer. @@ -3840,19 +3842,54 @@ TclStringReverse( Tcl_SetObjLength(objPtr, stringPtr->numChars); to = Tcl_GetUnicode(objPtr); while (--src >= from) { +#if TCL_UTF_MAX < 4 + ch = *src; + if ((ch & 0xF800) == 0xD800) { + needFlip = 1; + } + *to++ = ch; +#else *to++ = *src; +#endif } } else { /* * Reversing in place. */ +#if TCL_UTF_MAX < 4 + to = src; +#endif while (--src > from) { ch = *src; +#if TCL_UTF_MAX < 4 + if ((ch & 0xF800) == 0xD800) { + needFlip = 1; + } +#endif *src = *from; *from++ = ch; } } +#if TCL_UTF_MAX < 4 + if (needFlip) { + /* + * Flip back surrogate pairs. + */ + + from = to - stringPtr->numChars; + while (--to >= from) { + ch = *to; + if ((ch & 0xFC00) == 0xD800) { + if ((to-1 >= from) && ((to[-1] & 0xFC00) == 0xDC00)) { + to[0] = to[-1]; + to[-1] = ch; + --to; + } + } + } + } +#endif } if (objPtr->bytes) { @@ -3876,8 +3913,8 @@ TclStringReverse( * Pass 1. Reverse the bytes of each multi-byte character. */ - int charCount = 0; int bytesLeft = numBytes; + int chw; while (bytesLeft) { /* @@ -3886,18 +3923,16 @@ TclStringReverse( * skip calling Tcl_UtfCharComplete() here. */ - int bytesInChar = TclUtfToUniChar(from, &ch); + int bytesInChar = TclUtfToUCS4(from, &chw); ReverseBytes((unsigned char *)to, (unsigned char *)from, bytesInChar); to += bytesInChar; from += bytesInChar; bytesLeft -= bytesInChar; - charCount++; } from = to = objPtr->bytes; - stringPtr->numChars = charCount; } /* Pass 2. Reverse all the bytes. */ ReverseBytes((unsigned char *)to, (unsigned char *)from, numBytes); diff --git a/tests/string.test b/tests/string.test index b01d059..0eaa3da 100644 --- a/tests/string.test +++ b/tests/string.test @@ -2086,19 +2086,19 @@ test string-24.15.$noComp {string reverse command - pure bytearray} { binary scan [run {tcl::string::reverse [binary format H* 010203]}] H* x set x } 030201 -test string-24.16.$noComp {string reverse command - surrogates} knownBug { +test string-24.16.$noComp {string reverse command - surrogates} { run {string reverse \u0444bulb\uD83D\uDE02} } \uD83D\uDE02blub\u0444 -test string-24.17.$noComp {string reverse command - surrogates} knownBug { +test string-24.17.$noComp {string reverse command - surrogates} { run {string reverse \uD83D\uDE02hello\uD83D\uDE02} } \uD83D\uDE02olleh\uD83D\uDE02 -test string-24.18.$noComp {string reverse command - surrogates} knownBug { +test string-24.18.$noComp {string reverse command - surrogates} { set s \u0444bulb\uD83D\uDE02 # shim shimmery ... string index $s 0 run {string reverse $s} } \uD83D\uDE02blub\u0444 -test string-24.19.$noComp {string reverse command - surrogates} knownBug { +test string-24.19.$noComp {string reverse command - surrogates} { set s \uD83D\uDE02hello\uD83D\uDE02 # shim shimmery ... string index $s 0 |