summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-20 15:20:55 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-20 15:20:55 (GMT)
commit4f2621d9d59b2df9183fc4a90bb530dbccd18fc2 (patch)
tree89d59c07d835e1eee3c358e7f8aca4f6915b807a
parenteffab20437479dde3420488467f7ab773743d849 (diff)
downloadtcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.zip
tcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.tar.gz
tcl-4f2621d9d59b2df9183fc4a90bb530dbccd18fc2.tar.bz2
Proposed fix for [c11e0c5ce4]: Regression in Tcl_UtfCharComplete.
-rw-r--r--generic/tclUtf.c32
-rw-r--r--tests/utf.test136
2 files changed, 123 insertions, 45 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index aa949ca..842744d 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,17 +64,6 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
-};
-
-static const unsigned char complete[256] = {
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
#if TCL_UTF_MAX > 4
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -84,7 +73,11 @@ static const unsigned char complete[256] = {
#endif
2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
- 4,4,4,4,4,
+#if TCL_UTF_MAX > 3
+ 4,4,4,4,4,
+#else
+ 1,1,1,1,1,
+#endif
1,1,1,1,1,1,1,1,1,1,1
};
@@ -558,7 +551,7 @@ Tcl_UtfCharComplete(
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
- return length >= complete[(unsigned char)*src];
+ return length >= totalBytes[(unsigned char)*src];
}
/*
@@ -606,7 +599,7 @@ Tcl_NumUtfChars(
src = next;
}
} else {
- register const char *endPtr = src + length - /*TCL_UTF_MAX*/ 4;
+ register const char *endPtr = src + length - TCL_UTF_MAX;
while (src < endPtr) {
next = TclUtfNext(src);
@@ -617,7 +610,7 @@ Tcl_NumUtfChars(
#endif
src = next;
}
- endPtr += /*TCL_UTF_MAX*/ 4;
+ endPtr += TCL_UTF_MAX;
while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) {
next = TclUtfNext(src);
#if TCL_UTF_MAX > 4
@@ -895,15 +888,18 @@ Tcl_UtfPrev(
/* Continue the search backwards... */
look--;
- } while (trailBytesSeen < /* was TCL_UTF_MAX */ 4);
+ } while (trailBytesSeen < TCL_UTF_MAX);
/*
- * We've seen 4 (was TCL_UTF_MAX) trail bytes, so we know there will not be a
+ * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a
* properly formed byte sequence to find, and we can stop looking,
* accepting the fallback.
*/
-
+#if TCL_UTF_MAX < 4
+ return src - TCL_UTF_MAX;
+#else
return fallback;
+#endif
}
/*
diff --git a/tests/utf.test b/tests/utf.test
index f56fabc..3301dde 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -21,6 +21,7 @@ testConstraint testbytestring [llength [info commands testbytestring]]
catch {unset x}
# Some tests require support for 4-byte UTF-8 sequences
+testConstraint smallutf [expr {[format %c 0x010000] == "\uFFFD"}]
testConstraint fullutf [expr {[format %c 0x010000] != "\uFFFD"}]
testConstraint tip389 [expr {[string length \U010000] == 2}]
@@ -361,7 +362,10 @@ test utf-6.67 {Tcl_UtfNext} testutfnext {
test utf-6.68 {Tcl_UtfNext} testutfnext {
testutfnext \xF4\xA0\xA0G
} 1
-test utf-6.69 {Tcl_UtfNext} testutfnext {
+test utf-6.69 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0
+} 1
+test utf-6.69.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0
} 4
test utf-6.70 {Tcl_UtfNext} testutfnext {
@@ -376,22 +380,40 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
test utf-6.73 {Tcl_UtfNext} testutfnext {
testutfnext \xF4\xA0\xA0\xF8
} 1
-test utf-6.74 {Tcl_UtfNext} testutfnext {
+test utf-6.74 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0G
+} 1
+test utf-6.74.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0G
} 4
-test utf-6.75 {Tcl_UtfNext} testutfnext {
+test utf-6.75 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0\xA0
+} 1
+test utf-6.75.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0\xA0
} 4
-test utf-6.76 {Tcl_UtfNext} testutfnext {
+test utf-6.76 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0\xD0
+} 1
+test utf-6.76.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0\xD0
} 4
-test utf-6.77 {Tcl_UtfNext} testutfnext {
+test utf-6.77 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0\xE8
+} 1
+test utf-6.77.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0\xE8
} 4
-test utf-6.78 {Tcl_UtfNext} testutfnext {
+test utf-6.78 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0\xF4
+} 1
+test utf-6.78.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0\xF4
} 4
-test utf-6.79 {Tcl_UtfNext} testutfnext {
+test utf-6.79 {Tcl_UtfNext} {testutfnext smallutf} {
+ testutfnext \xF4\xA0\xA0\xA0G\xF8
+} 1
+test utf-6.79.1 {Tcl_UtfNext} {testutfnext fullutf} {
testutfnext \xF4\xA0\xA0\xA0G\xF8
} 4
test utf-6.80 {Tcl_UtfNext - overlong sequences} testutfnext {
@@ -415,7 +437,10 @@ test utf-6.85 {Tcl_UtfNext - overlong sequences} testutfnext {
test utf-6.86 {Tcl_UtfNext - overlong sequences} testutfnext {
testutfnext \xF0\x80\x80\x80
} 1
-test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext} {
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext smallutf} {
+ testutfnext \xF0\x90\x80\x80
+} 1
+test utf-6.87 {Tcl_UtfNext - overlong sequences} {testutfnext fullutf} {
testutfnext \xF0\x90\x80\x80
} 4
test utf-6.88 {Tcl_UtfNext, pointing to 2th byte of 3-byte valid sequence} {testutfnext} {
@@ -508,13 +533,22 @@ test utf-7.9.1 {Tcl_UtfPrev} testutfprev {
test utf-7.9.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xA0\xF8\xA0 3
} 2
-test utf-7.10 {Tcl_UtfPrev} testutfprev {
+test utf-7.10 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0
+} 2
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xA0\xA0 3
+} 2
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xF8\xA0 3
+} 2
+test utf-7.10 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0
} 1
-test utf-7.10.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xA0\xA0 3
} 1
-test utf-7.10.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.10.2 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xF8\xA0 3
} 1
test utf-7.11 {Tcl_UtfPrev} testutfprev {
@@ -556,13 +590,22 @@ test utf-7.14.1 {Tcl_UtfPrev} testutfprev {
test utf-7.14.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xF8\xA0\xA0\xF8 4
} 3
-test utf-7.15 {Tcl_UtfPrev} testutfprev {
+test utf-7.15 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xA0
+} 3
+test utf-7.15.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xA0\xA0 4
+} 3
+test utf-7.15.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xA0\xF8 4
+} 3
+test utf-7.15.3 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xA0
} 1
-test utf-7.15.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.4 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xA0\xA0 4
} 1
-test utf-7.15.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.15.5 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xA0\xF8 4
} 1
test utf-7.16 {Tcl_UtfPrev} testutfprev {
@@ -583,28 +626,52 @@ test utf-7.17.1 {Tcl_UtfPrev} testutfprev {
test utf-7.17.2 {Tcl_UtfPrev} testutfprev {
testutfprev A\xD0\xA0\xA0\xF8 4
} 3
-test utf-7.18 {Tcl_UtfPrev} testutfprev {
+test utf-7.18 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xA0\xA0\xA0
+} 1
+test utf-7.18.1 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xA0\xA0\xA0\xA0 4
+} 1
+test utf-7.18.2 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xA0\xA0\xA0\xF8 4
+} 1
+test utf-7.18.3 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0
} 3
-test utf-7.18.1 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.4 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xA0 4
} 3
-test utf-7.18.2 {Tcl_UtfPrev} testutfprev {
+test utf-7.18.5 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xF8 4
} 3
-test utf-7.19 {Tcl_UtfPrev} testutfprev {
+test utf-7.19 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF8\xA0\xA0\xA0
+} 2
+test utf-7.19.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF8\xA0\xA0\xA0
} 4
-test utf-7.20 {Tcl_UtfPrev} testutfprev {
+test utf-7.20 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xF4\xA0\xA0\xA0
+} 2
+test utf-7.20.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xF4\xA0\xA0\xA0
} 1
-test utf-7.21 {Tcl_UtfPrev} testutfprev {
+test utf-7.21 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xE8\xA0\xA0\xA0
+} 2
+test utf-7.21.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xE8\xA0\xA0\xA0
} 4
-test utf-7.22 {Tcl_UtfPrev} testutfprev {
+test utf-7.22 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xD0\xA0\xA0\xA0
+} 2
+test utf-7.22.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xD0\xA0\xA0\xA0
} 4
-test utf-7.23 {Tcl_UtfPrev} testutfprev {
+test utf-7.23 {Tcl_UtfPrev} {testutfprev smallutf} {
+ testutfprev A\xA0\xA0\xA0\xA0
+} 2
+test utf-7.23.1 {Tcl_UtfPrev} {testutfprev fullutf} {
testutfprev A\xA0\xA0\xA0\xA0
} 4
test utf-7.24 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -628,7 +695,10 @@ test utf-7.28 {Tcl_UtfPrev -- overlong sequence} testutfprev {
test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\x80\x80 2
} 1
-test utf-7.29 {Tcl_UtfPrev -- overlong sequence} testutfprev {
+test utf-7.29 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x80\x80\x80
+} 2
+test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x80\x80\x80
} 4
test utf-7.30 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -658,13 +728,22 @@ test utf-7.37 {Tcl_UtfPrev -- overlong sequence} testutfprev {
test utf-7.38 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xE0\xA0\x80 2
} 1
-test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.39 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80
+} 2
+test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80
} 1
-test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.40 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80 4
+} 3
+test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80 4
} 1
-test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev} {
+test utf-7.41 {Tcl_UtfPrev -- overlong sequence} {testutfprev smallutf} {
+ testutfprev A\xF0\x90\x80\x80 3
+} 2
+test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev fullutf} {
testutfprev A\xF0\x90\x80\x80 3
} 1
test utf-7.42 {Tcl_UtfPrev -- overlong sequence} testutfprev {
@@ -679,7 +758,10 @@ test utf-7.44 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
testutfprev \xA0\xA0\xA0
} 2
-test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} testutfprev {
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev smallutf} {
+ testutfprev \xA0\xA0\xA0\xA0
+} 1
+test utf-7.46 {Tcl_UtfPrev -- no lead byte at start} {testutfprev fullutf} {
testutfprev \xA0\xA0\xA0\xA0
} 3
test utf-7.47 {Tcl_UtfPrev, pointing to 3th byte of 3-byte valid sequence} {testutfprev} {