summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-10 19:28:08 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-10 19:28:08 (GMT)
commit0733f232745ac3cc9a3bd4913bd5ffb8b58378a5 (patch)
treec2825d5f4f6889b70cc3a789c36b53da74eb2668
parent1102f22c5a663ad68838f182b53a44d159ac090d (diff)
downloadtcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.zip
tcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.tar.gz
tcl-0733f232745ac3cc9a3bd4913bd5ffb8b58378a5.tar.bz2
Tweak Invalid() function: No need for "return 0" twice in the function.
For start bytes F0-F4, case TCL_UTF_MAX=4, Tcl_UtfToUniChar() reads 3 bytes but only advances 1 byte. So Tcl_UtfCharComplete() must make sure 3 bytes are available, not 1. Adapt Tcl_UtfCharComplete() accordingly. No change for TCL_UTF_MAX=[3|6]
-rw-r--r--generic/tclUtf.c39
-rw-r--r--tests/utf.test14
2 files changed, 38 insertions, 15 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index c0de80a..5e0b2e0 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -81,6 +81,30 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1
};
+static const unsigned char complete[256] = {
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+#if TCL_UTF_MAX < 4
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+#else /* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+#endif
+ 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+#if TCL_UTF_MAX > 4
+ 4,4,4,4,4,
+#elif TCL_UTF_MAX < 4
+ 1,1,1,1,1,
+#else
+ 3,3,3,3,3,
+#endif
+ 1,1,1,1,1,1,1,1,1,1,1
+};
+
/*
* Functions used only in this module.
*/
@@ -174,14 +198,13 @@ Invalid(
unsigned char byte = UCHAR(*src);
int index;
- if ((byte & 0xC3) != 0xC0) {
+ if ((byte & 0xC3) == 0xC0) {
/* Only lead bytes 0xC0, 0xE0, 0xF0, 0xF4 need examination */
- return 0;
- }
- index = (byte - 0xC0) >> 1;
- if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) {
- /* Out of bounds - report invalid. */
- return 1;
+ index = (byte - 0xC0) >> 1;
+ if (UCHAR(src[1]) < bounds[index] || UCHAR(src[1]) > bounds[index+1]) {
+ /* Out of bounds - report invalid. */
+ return 1;
+ }
}
return 0;
}
@@ -568,7 +591,7 @@ Tcl_UtfCharComplete(
* a complete UTF-8 character. */
int length) /* Length of above string in bytes. */
{
- return length >= totalBytes[UCHAR(*src)];
+ return length >= complete[UCHAR(*src)];
}
/*
diff --git a/tests/utf.test b/tests/utf.test
index 1a4b157..8745385 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -570,13 +570,13 @@ test utf-6.108 {Tcl_UtfNext, read limits} {testutfnext testbytestring} {
test utf-6.109 {Tcl_UtfNext, read limits} {testutfnext testbytestring} {
testutfnext \u8820[testbytestring \xA0] 3
} 3
-test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.110.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1
} 1
-test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} {
+test utf-6.110.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 1
} 0
-test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.111.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 2
} 1
test utf-6.111.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} {
@@ -594,16 +594,16 @@ test utf-6.113.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf
test utf-6.113.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0]G 4
} 4
-test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.114.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1
} 1
-test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} {
+test utf-6.114.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 1
} 0
-test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} {
+test utf-6.115.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2
} 1
-test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs4} {
+test utf-6.115.1 {Tcl_UtfNext, read limits} {testutfnext testbytestring fullutf} {
testutfnext [testbytestring \xF2\xA0\xA0\xA0\xA0] 2
} 0
test utf-6.116.0 {Tcl_UtfNext, read limits} {testutfnext testbytestring ucs2_utf16} {