summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-17 10:32:52 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-17 10:32:52 (GMT)
commit0c272ad2ce2e910b0642d1a2691ddb19e63291fb (patch)
treea612a54786dbdd46b99c7fde24a183fc5b1d93e6
parent98700eaf2b0d54d262c427dd5dd30133e6a14fbd (diff)
parentb4ecc6b747780c309fbf05d525ecb3c5f74f3ff2 (diff)
downloadtcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.zip
tcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.tar.gz
tcl-0c272ad2ce2e910b0642d1a2691ddb19e63291fb.tar.bz2
More test-cases. Mark test-case utf-2.11 as "knownBug", doesn't give the right answer for any TCL_UTC_MAX value. TODO: To Be Fixed! (Don ????)
Fix build/testcase for TCL_UTF_MAX=6 (testcase is OK, "string length" implementation was not!)
-rw-r--r--generic/tclUtf.c12
-rw-r--r--tests/utf.test31
2 files changed, 41 insertions, 2 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 4b5d500..94fa5f6 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -598,7 +598,11 @@ Tcl_NumUtfChars(
if (length < 0) {
while ((*src != '\0') && (i < INT_MAX)) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
} else {
@@ -606,13 +610,21 @@ Tcl_NumUtfChars(
while (src < endPtr) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
endPtr += /*TCL_UTF_MAX*/ 4;
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
next = TclUtfNext(src);
+#if TCL_UTF_MAX > 4
+ i++;
+#else
i += 1 + ((next - src) > 3);
+#endif
src = next;
}
if (src < endPtr) {
diff --git a/tests/utf.test b/tests/utf.test
index c584ea1..0ba2b85 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -94,9 +94,9 @@ test utf-2.9 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail} -constraints
test utf-2.10 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, underflow} testbytestring {
string length [testbytestring "\xF0\x8F\xBF\xBF"]
} {4}
-test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} testbytestring {
+test utf-2.11 {Tcl_UtfToUniChar: lead (4-byte) followed by 3 trail, overflow} {testbytestring knownBug} {# Doesn't work with any TCL_UTF_MAX value
string length [testbytestring "\xF4\x90\x80\x80"]
-} {2}
+} {4}
test utf-2.12 {Tcl_UtfToUniChar: longer UTF sequences not supported} testbytestring {
string length [testbytestring "\xF8\xA2\xA2\xA2\xA2"]
} {5}
@@ -419,8 +419,17 @@ test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
testutfnext \xF0\x90\x80\x80
} 4
test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
+ testutfnext \xA0\xA0
+} 1
+test utf-6.88.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
testutfnext \xE8\xA0\xA0 1
} 2
+test utf-6.89 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+ testutfnext \x80\x80
+} 1
+test utf-6.89.1 {Tcl_UtfNext, pointing to 2th byte of 3-byte invalid sequence} {testutfnext} {
+ testutfnext \xF0\x80\x80 1
+} 2
testConstraint testutfprev [llength [info commands testutfprev]]
@@ -505,6 +514,9 @@ test utf-7.11.1 {Tcl_UtfPrev} testutfprev {
test utf-7.11.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xE8\xA0\xF8\xA0 3
} 1
+test utf-7.11.3 {Tcl_UtfPrev} testutfprev {
+ testutfprev A\xE8\xA0\xF8 3
+} 1
test utf-7.12 {Tcl_UtfPrev} testutfprev {
testutfprev A\xD0\xA0
} 1
@@ -593,9 +605,15 @@ test utf-7.26 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80
} 3
test utf-7.27 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+ testutfprev A\xE0\x80
+} 2
+test utf-7.27.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 3
} 2
test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+ testutfprev A\xE0
+} 1
+test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 2
} 1
test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -652,6 +670,15 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
testutfprev \xA0\xA0\xA0\xA0
} 3
+test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+ testutfprev \xE8\xA0
+} 0
+test utf-7.47.1 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {
+ testutfprev \xE8\xA0\xA0 2
+} 0
+test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} {testutfprev} {
+ testutfprev \xE8\xA0\x00 2
+} 0
test utf-8.1 {Tcl_UniCharAtIndex: index = 0} {
string index abcd 0