summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclUtf.c12
-rw-r--r--tests/utf.test31
2 files changed, 41 insertions, 2 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4b5d500..94fa5f6 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -598,7 +598,11 @@ Tcl_NumUtfChars(
if (length < 0) {
while ((*src != '\0') && (i < INT_MAX)) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
} else {
@@ -606,13 +610,21 @@ Tcl_NumUtfChars(
while (src < endPtr) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
endPtr += /*TCL_UTF_MAX*/ 4;
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
if (src < endPtr) {
diff --git a/tests/utf.test b/tests/utf.test
index c584ea1..0ba2b85 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -94,9 +94,9 @@ test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
string length [testbytestring "\xF0\x8F\xBF\xBF"]
} {4}
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value
string length [testbytestring "\xF4\x90\x80\x80"]
-} {2}
+} {4}
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
} {5}
@@ -419,8 +419,17 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
testutfnext \xF0\x90\x80\x80
} 4
test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+ testutfnext \xA0\xA0
+} 1
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
testutfnext \xE8\xA0\xA0 1
} 2
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+ testutfnext \x80\x80
+} 1
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+ testutfnext \xF0\x80\x80 1
+} 2
testConstraint testutfprev [llength [info commands testutfprev]]
@@ -505,6 +514,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev {
test utf-7.11.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8\xA0\xF8\xA0 3
} 1
+test utf-7.11.3 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xF8 3
+} 1
test utf-7.12 {Tcl_UtfPrev} testutfprev {
testutfprev A\xD0\xA0
} 1
@@ -593,9 +605,15 @@ test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80
} 3
test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+ testutfprev A\xE0\x80
+} 2
+test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 3
} 2
test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+ testutfprev A\xE0
+} 1
+test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 2
} 1
test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -652,6 +670,15 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
testutfprev \xA0\xA0\xA0\xA0
} 3
+test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+ testutfprev \xE8\xA0
+} 0
+test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+ testutfprev \xE8\xA0\xA0 2
+} 0
+test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
+ testutfprev \xE8\xA0\x00 2
+} 0
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
string index abcd 0