diff options
| author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-21 07:18:50 (GMT) |
|---|---|---|
| committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-04-21 07:18:50 (GMT) |
| commit | a1d0f9db908841dd9ea1e6732b933cf385fcb459 (patch) | |
| tree | b892bcbec65a3d04947a491c20770b8fb968be68 | |
| parent | 942bd1ddd961886f38b16577614a77f473bc1239 (diff) | |
| parent | 206022e9799361a82f91780bace269e514fb27bf (diff) | |
| download | tcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.zip tcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.tar.gz tcl-a1d0f9db908841dd9ea1e6732b933cf385fcb459.tar.bz2 | |
Merge 8.7
| -rw-r--r-- | generic/tclUtf.c | 20 | ||||
| -rw-r--r-- | tests/binary.test | 15 | ||||
| -rw-r--r-- | tests/utf.test | 14 |
3 files changed, 35 insertions, 14 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index b4f760f..6908985 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,12 +64,14 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/* End of "continuation byte section" */ + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 }; - + static const unsigned char complete[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -79,8 +81,14 @@ static const unsigned char complete[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif + 1,1,1,1,1,1,1,1,1,1,1 }; /* diff --git a/tests/binary.test b/tests/binary.test index a777b2a..b06afe0 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -16,6 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { } testConstraint bigEndian [expr {$tcl_platform(byteOrder) eq "bigEndian"}] testConstraint littleEndian [expr {$tcl_platform(byteOrder) eq "littleEndian"}] +testConstraint testbytestring [llength [info commands testbytestring]] # Big test for correct ordering of data in [expr] proc testIEEE {} { @@ -2941,7 +2942,19 @@ test binary-79.2 {Tcl_SetByteArrayLength} testsetbytearraylength { testsetbytearraylength [string cat \u0141 B C] 1 } A - +test binary-80.1 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring "\u4E4E" +} -result "expected byte sequence but character 0 was '\u4E4E' (U+004E4E)" +test binary-80.2 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\x00\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.3 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.4 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xF0\x9F\x98\x81"] +} -result "expected byte sequence but character 4 was '\U01F601' (U+01F601)" + # ---------------------------------------------------------------------- # cleanup diff --git a/tests/utf.test b/tests/utf.test index f3633bd..9b319f3 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -140,10 +140,10 @@ test utf-4.10 {Tcl_NumUtfChars: #u0000, calc len, overcomplete} {testnumutfchars testnumutfchars [testbytestring "\x00"] end+1 } {2} test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfchars testbytestring} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end-1 + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end-1 } {3} test utf-4.12 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring tip389} { - testnumutfchars [testbytestring \xf0\x9f\x92\xa9] end + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } {2} test utf-5.1 {Tcl_UtfFindFirst} {testfindfirst testbytestring} { @@ -246,7 +246,7 @@ test utf-6.30 {Tcl_UtfNext} testutfnext { testutfnext \xF2 } 1 test utf-6.31 {Tcl_UtfNext} testutfnext { - testutfnext \xF2A + testutfnext \xF2G } 1 test utf-6.32 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0 @@ -369,7 +369,7 @@ test utf-6.71 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xE8 } 1 test utf-6.71 {Tcl_UtfNext} testutfnext { - testutfnext \xF2\xA0\xA0\xF4 + testutfnext \xF2\xA0\xA0\xF2 } 1 test utf-6.73 {Tcl_UtfNext} testutfnext { testutfnext \xF2\xA0\xA0\xF8 @@ -647,7 +647,7 @@ test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { } 1 test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC1\x80 -} 1 +} 2 test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC2\x80 } 1 @@ -873,8 +873,8 @@ test utf-12.3 {Tcl_UtfToLower} { string tolower \xC3GH } \xE3gh test utf-12.4 {Tcl_UtfToLower} { - string tolower \u01E2AB -} \u01E3ab + string tolower \u01E2GH +} \u01E3gh test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 |
