summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2021-04-09 13:01:19 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2021-04-09 13:01:19 (GMT)
commit459073607da72a28bb34c120c5ac462ecd4cbe93 (patch)
tree4032c2f87e63cecf209179c4727965802345b7a5
parent0d7132ebb280216561e9dc82002d476155681934 (diff)
downloadtcl-459073607da72a28bb34c120c5ac462ecd4cbe93.zip
tcl-459073607da72a28bb34c120c5ac462ecd4cbe93.tar.gz
tcl-459073607da72a28bb34c120c5ac462ecd4cbe93.tar.bz2
Fix TclStringCat() functions. This makes allmost all testcases pass, one left to go: utf-1.18
-rw-r--r--generic/tclStringObj.c27
-rw-r--r--tests/append.test5
-rw-r--r--tests/utf.test38
3 files changed, 45 insertions, 25 deletions
diff --git a/generic/tclStringObj.c b/generic/tclStringObj.c
index e9f8c11..78a47e3 100644
--- a/generic/tclStringObj.c
+++ b/generic/tclStringObj.c
@@ -3048,7 +3048,7 @@ TclStringCat(
{
Tcl_Obj *objResultPtr, * const *ov;
int oc, length = 0, binary = 1;
- int allowUniChar = 1, requestUniChar = 0;
+ int allowUniChar = 1, requestUniChar = 0, forceUniChar = 0;
int first = objc - 1; /* Index of first value possibly not empty */
int last = 0; /* Index of last value possibly not empty */
int inPlace = flags & TCL_STRING_IN_PLACE;
@@ -3084,7 +3084,9 @@ TclStringCat(
*/
binary = 0;
- if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
+ if (ov > objv+1 && ISCONTINUATION(objPtr->bytes)) {
+ forceUniChar = 1;
+ } else if ((objPtr->typePtr) && (objPtr->typePtr != &tclStringType)) {
/* Prevent shimmer of non-string types. */
allowUniChar = 0;
}
@@ -3133,7 +3135,7 @@ TclStringCat(
}
}
} while (--oc);
- } else if (allowUniChar && requestUniChar) {
+ } else if ((allowUniChar && requestUniChar) || forceUniChar) {
/*
* Result will be pure Tcl_UniChar array. Pre-size it.
*/
@@ -3286,7 +3288,7 @@ TclStringCat(
dst += more;
}
}
- } else if (allowUniChar && requestUniChar) {
+ } else if ((allowUniChar && requestUniChar) || forceUniChar) {
/* Efficiently produce a pure Tcl_UniChar array result */
Tcl_UniChar *dst;
@@ -4161,9 +4163,22 @@ ExtendUnicodeRepWithString(
} else {
numAppendChars = 0;
}
- for (dst=stringPtr->unicode + numOrigChars; numAppendChars-- > 0; dst++) {
+ dst = stringPtr->unicode + numOrigChars;
+ if (numAppendChars-- > 0) {
bytes += TclUtfToUniChar(bytes, &unichar);
- *dst = unichar;
+#if TCL_UTF_MAX > 3
+ /* join upper/lower surrogate */
+ if (bytes && (stringPtr->unicode[numOrigChars - 1] | 0x3FF) == 0xDBFF && (unichar | 0x3FF) == 0xDFFF) {
+ stringPtr->numChars--;
+ unichar = ((stringPtr->unicode[numOrigChars - 1] & 0x3FF) << 10) + (unichar & 0x3FF) + 0x10000;
+ dst--;
+ }
+#endif
+ *dst++ = unichar;
+ while (numAppendChars-- > 0) {
+ bytes += TclUtfToUniChar(bytes, &unichar);
+ *dst++ = unichar;
+ }
}
*dst = 0;
}
diff --git a/tests/append.test b/tests/append.test
index 057410a..7d895b7 100644
--- a/tests/append.test
+++ b/tests/append.test
@@ -77,6 +77,11 @@ test append-3.9 {append \xC0 \x80} -body {
set x [testbytestring \x80]
string length [testbytestring \xC0]$x
} -result 2
+test append-3.10 {append surrogates} -body {
+ set x \uD83D
+ string range $x 0 end
+ append x \uDE02
+} -result [string range \uD83D\uDE02 0 end]
test append-4.1 {lappend command} {
unset -nocomplain x
diff --git a/tests/utf.test b/tests/utf.test
index 6ae8d18..477216c 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -223,7 +223,7 @@ test utf-6.3 {Tcl_UtfNext} testutfnext {
testutfnext AA
} 1
test utf-6.4 {Tcl_UtfNext} {testutfnext testbytestring} {
- testutfnext A[testbytestring \xA0]
+ testutfnext [testbytestring A\xA0]
} 1
test utf-6.5 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext A[testbytestring \xD0]
@@ -409,7 +409,7 @@ test utf-6.62 {Tcl_UtfNext} testutfnext {
testutfnext 蠠G
} 3
test utf-6.63 {Tcl_UtfNext} {testutfnext testbytestring} {
- testutfnext 蠠[testbytestring \xA0]
+ testutfnext [testbytestring \xE8\xA0\xA0\xA0]
} 3
test utf-6.64 {Tcl_UtfNext} {testutfnext testbytestring} {
testutfnext 蠠[testbytestring \xD0]
@@ -584,7 +584,7 @@ test utf-7.6 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE8]
} 1
test utf-7.6.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A蠠[testbytestring \xA0] 2
+ testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 2
} 1
test utf-7.6.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE8\xF8\xA0\xA0] 2
@@ -599,13 +599,13 @@ test utf-7.7.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xF8\xA0\xA0] 2
} 1
test utf-7.8 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0]
+ testutfprev [testbytestring A\xA0]
} 1
test utf-7.8.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 2
+ testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 2
} 1
test utf-7.8.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xF8\xA0\xA0] 2
+ testutfprev [testbytestring A\xA0\xF8\xA0\xA0] 2
} 1
test utf-7.9 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0]
@@ -638,7 +638,7 @@ test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE8\xA0]
} 1
test utf-7.11.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A蠠[testbytestring \xA0] 3
+ testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 3
} 1
test utf-7.11.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xE8\xA0\xF8\xA0] 3
@@ -656,13 +656,13 @@ test utf-7.12.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xF8\xA0] 3
} 1
test utf-7.13 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0]
+ testutfprev [testbytestring A\xA0\xA0]
} 2
test utf-7.13.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 3
+ testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 3
} 2
test utf-7.13.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xF8\xA0] 3
+ testutfprev [testbytestring A\xA0\xA0\xF8\xA0] 3
} 2
test utf-7.14 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xF8\xA0\xA0]
@@ -695,7 +695,7 @@ test utf-7.16 {Tcl_UtfPrev} testutfprev {
testutfprev A蠠
} 1
test utf-7.16.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A蠠[testbytestring \xA0] 4
+ testutfprev A[testbytestring \xE8\xA0\xA0\xA0] 4
} 1
test utf-7.16.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A蠠[testbytestring \xF8] 4
@@ -710,31 +710,31 @@ test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xF8] 4
} 3
test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0]
+ testutfprev [testbytestring A\xA0\xA0\xA0]
} 3
test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4
+ testutfprev [testbytestring A\xA0\xA0\xA0\xA0] 4
} 3
test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4
+ testutfprev [testbytestring A\xA0\xA0\xA0\xF8] 4
} 3
test utf-7.19 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xF8\xA0\xA0\xA0]
+ testutfprev [testbytestring A\xF8\xA0\xA0\xA0]
} 4
test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
+ testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
} 4
test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} {
- testutfprev A[testbytestring \xF2\xA0\xA0\xA0]
+ testutfprev [testbytestring A\xF2\xA0\xA0\xA0]
} 1
test utf-7.21 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A蠠[testbytestring \xA0]
+ testutfprev A[testbytestring \xE8\xA0\xA0\xA0]
} 4
test utf-7.22 {Tcl_UtfPrev} {testutfprev testbytestring} {
testutfprev A[testbytestring \xD0\xA0\xA0\xA0]
} 4
test utf-7.23 {Tcl_UtfPrev} {testutfprev testbytestring} {
- testutfprev A[testbytestring \xA0\xA0\xA0\xA0]
+ testutfprev [testbytestring A\xA0\xA0\xA0\xA0]
} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} {
testutfprev A[testbytestring \xC0\x81]