summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-21 07:18:50 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-04-21 07:18:50 (GMT)
commita1d0f9db908841dd9ea1e6732b933cf385fcb459 (patch)
treeb892bcbec65a3d04947a491c20770b8fb968be68
parent942bd1ddd961886f38b16577614a77f473bc1239 (diff)
parent206022e9799361a82f91780bace269e514fb27bf (diff)
downloadtcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.zip
tcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.tar.gz
tcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.tar.bz2
Merge 8.7
-rw-r--r--generic/tclUtf.c20
-rw-r--r--tests/binary.test15
-rw-r--r--tests/utf.test14
3 files changed, 35 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index b4f760f..6908985 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -64,12 +64,14 @@ static const unsigned char totalBytes[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+/* End of "continuation byte section" */
+ 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
};
-
+
static const unsigned char complete[256] = {
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
@@ -79,8 +81,14 @@ static const unsigned char complete[256] = {
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/* End of "continuation byte section" */
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1
+ 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+#if TCL_UTF_MAX > 3
+ 4,4,4,4,4,
+#else
+ 1,1,1,1,1,
+#endif
+ 1,1,1,1,1,1,1,1,1,1,1
};
/*
diff --git a/tests/binary.test b/tests/binary.test
index a777b2a..b06afe0 100644
--- a/tests/binary.test
+++ b/tests/binary.test
@@ -16,6 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} {
}
testConstraint bigEndian [expr {$tcl_platform(byteOrder) eq "bigEndian"}]
testConstraint littleEndian [expr {$tcl_platform(byteOrder) eq "littleEndian"}]
+testConstraint testbytestring [llength [info commands testbytestring]]
# Big test for correct ordering of data in [expr]
proc testIEEE {} {
@@ -2941,7 +2942,19 @@ test binary-79.2 {Tcl_SetByteArrayLength} testsetbytearraylength {
testsetbytearraylength [string cat \u0141 B C] 1
} A
-
+test binary-80.1 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body {
+ testbytestring "\u4E4E"
+} -result "expected byte sequence but character 0 was '\u4E4E' (U+004E4E)"
+test binary-80.2 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body {
+ testbytestring [testbytestring "\x00\xA0\xA0\xA0\xE4\xB9\x8E"]
+} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)"
+test binary-80.3 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body {
+ testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xE4\xB9\x8E"]
+} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)"
+test binary-80.4 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body {
+ testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xF0\x9F\x98\x81"]
+} -result "expected byte sequence but character 4 was '\U01F601' (U+01F601)"
+
# ----------------------------------------------------------------------
# cleanup
diff --git a/tests/utf.test b/tests/utf.test
index f3633bd..9b319f3 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -140,10 +140,10 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars
testnumutfchars [testbytestring "\x00"] end+1
} {2}
test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} {
- testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end-1
+ testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1
} {3}
test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} {
- testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end
+ testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end
} {2}
test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} {
@@ -246,7 +246,7 @@ test utf-6.30 {Tcl_UtfNext} testutfnext {
testutfnext \xF2
} 1
test utf-6.31 {Tcl_UtfNext} testutfnext {
- testutfnext \xF2A
+ testutfnext \xF2G
} 1
test utf-6.32 {Tcl_UtfNext} testutfnext {
testutfnext \xF2\xA0
@@ -369,7 +369,7 @@ test utf-6.71 {Tcl_UtfNext} testutfnext {
testutfnext \xF2\xA0\xA0\xE8
} 1
test utf-6.71 {Tcl_UtfNext} testutfnext {
- testutfnext \xF2\xA0\xA0\xF4
+ testutfnext \xF2\xA0\xA0\xF2
} 1
test utf-6.73 {Tcl_UtfNext} testutfnext {
testutfnext \xF2\xA0\xA0\xF8
@@ -647,7 +647,7 @@ test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev {
} 1
test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xC1\x80
-} 1
+} 2
test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev {
testutfprev A\xC2\x80
} 1
@@ -873,8 +873,8 @@ test utf-12.3 {Tcl_UtfToLower} {
string tolower \xC3GH
} \xE3gh
test utf-12.4 {Tcl_UtfToLower} {
- string tolower \u01E2AB
-} \u01E3ab
+ string tolower \u01E2GH
+} \u01E3gh
test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
string tolower \u10D0\u1C90
} \u10D0\u10D0