summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordgp <dgp@users.sourceforge.net>2017-06-05 17:15:11 (GMT)
committerdgp <dgp@users.sourceforge.net>2017-06-05 17:15:11 (GMT)
commit0b178367ba3b9a10d44f89025c89292a4ea49b20 (patch)
tree853c1526b11c08616068ce931e63175c687bfd8f
parent1d537a6b450c04d52d93f2c04bf3135b60711e61 (diff)
parent67fd012128e6e76e3d745ca090075acbcfa9f258 (diff)
downloadtcl-0b178367ba3b9a10d44f89025c89292a4ea49b20.zip
tcl-0b178367ba3b9a10d44f89025c89292a4ea49b20.tar.gz
tcl-0b178367ba3b9a10d44f89025c89292a4ea49b20.tar.bz2
[67aa9a2070] Tcl_UtfToUniChar returns single byte for invalid UTF-8 input as documented.
-rw-r--r--generic/tclUtf.c12
-rw-r--r--tests/encoding.test25
2 files changed, 34 insertions, 3 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b33bf6a..eec4068 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -292,7 +292,9 @@ Tcl_UtfToUniChar(
*/
*chPtr = (Tcl_UniChar) (((byte & 0x1F) << 6) | (src[1] & 0x3F));
- return 2;
+ if ((*chPtr == 0) || (*chPtr > 0x7f)) {
+ return 2;
+ }
}
/*
@@ -307,7 +309,9 @@ Tcl_UtfToUniChar(
*chPtr = (Tcl_UniChar) (((byte & 0x0F) << 12)
| ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
- return 3;
+ if (*chPtr > 0x7ff) {
+ return 3;
+ }
}
/*
@@ -324,7 +328,9 @@ Tcl_UtfToUniChar(
*chPtr = (Tcl_UniChar) (((byte & 0x0E) << 18) | ((src[1] & 0x3F) << 12)
| ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
- return 4;
+ if ((*chPtr <= 0x10ffff) && (*chPtr > 0xffff)) {
+ return 4;
+ }
}
/*
diff --git a/tests/encoding.test b/tests/encoding.test
index d9ba072..eb2a08f 100644
--- a/tests/encoding.test
+++ b/tests/encoding.test
@@ -448,6 +448,31 @@ test encoding-24.3 {EscapeFreeProc on open channels} {stdio} {
list $count [viewable $line]
} [list 3 "\u4e4e\u4e5e\u4e5f (\\u4e4e\\u4e5e\\u4e5f)"]
+test encoding-24.4 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xc0\x80"]
+} 1
+test encoding-24.5 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xc0\x81"]
+} 2
+test encoding-24.6 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xc1\xbf"]
+} 2
+test encoding-24.7 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xc2\x80"]
+} 1
+test encoding-24.8 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xe0\x80\x80"]
+} 3
+test encoding-24.9 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xe0\x9f\xbf"]
+} 3
+test encoding-24.10 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xe0\xa0\x80"]
+} 1
+test encoding-24.10 {Parse valid or invalid utf-8} {
+ string length [encoding convertfrom utf-8 "\xef\xbf\xbf"]
+} 1
+
file delete [file join [temporaryDirectory] iso2022.txt]
#