From 059d5cc1803ee62dea1b7fd7e42e75453ce27264 Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 20:44:24 +0000 Subject: Add test-cases handling TclGetBytesFromObj() ( actually Tcl_UtfToUniChar too) --- tests/binary.test | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/tests/binary.test b/tests/binary.test index a777b2a..b06afe0 100644 --- a/tests/binary.test +++ b/tests/binary.test @@ -16,6 +16,7 @@ if {[lsearch [namespace children] ::tcltest] == -1} { } testConstraint bigEndian [expr {$tcl_platform(byteOrder) eq "bigEndian"}] testConstraint littleEndian [expr {$tcl_platform(byteOrder) eq "littleEndian"}] +testConstraint testbytestring [llength [info commands testbytestring]] # Big test for correct ordering of data in [expr] proc testIEEE {} { @@ -2941,7 +2942,19 @@ test binary-79.2 {Tcl_SetByteArrayLength} testsetbytearraylength { testsetbytearraylength [string cat \u0141 B C] 1 } A - +test binary-80.1 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring "\u4E4E" +} -result "expected byte sequence but character 0 was '\u4E4E' (U+004E4E)" +test binary-80.2 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\x00\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.3 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xE4\xB9\x8E"] +} -result "expected byte sequence but character 4 was '\u4E4E' (U+004E4E)" +test binary-80.4 {TclGetBytesFromObj} -constraints testbytestring -returnCodes 1 -body { + testbytestring [testbytestring "\xC0\x80\xA0\xA0\xA0\xF0\x9F\x98\x81"] +} -result "expected byte sequence but character 4 was '\U01F601' (U+01F601)" + # ---------------------------------------------------------------------- # cleanup -- cgit v0.12 From 206022e9799361a82f91780bace269e514fb27bf Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 20 Apr 2020 21:54:31 +0000 Subject: Teach Tcl_UtfPrev() that 0xC1 is _always_ an invallid byte. Test-case utf-7.34. Make sure that Tcl_UtfCharComplete(src, TCL_UTF_MAX) always returns 1, for whatever bytes, since that's the maximum number of bytes Tcl_UtfToUniChar() can read in a single call. --- generic/tclUtf.c | 20 ++++++++++++++------ tests/utf.test | 2 +- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2a04414..c018472 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -64,12 +64,14 @@ static const unsigned char totalBytes[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, +/* Tcl_UtfCharComplete() might point to 2nd byte of valid 4-byte sequence */ + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +/* End of "continuation byte section" */ + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 }; - + static const unsigned char complete[256] = { 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, @@ -79,8 +81,14 @@ static const unsigned char complete[256] = { 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* End of "continuation byte section" */ - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,1,1,1,1,1,1,1,1,1,1,1 + 2,1,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, + 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, +#if TCL_UTF_MAX > 3 + 4,4,4,4,4, +#else + 1,1,1,1,1, +#endif + 1,1,1,1,1,1,1,1,1,1,1 }; /* diff --git a/tests/utf.test b/tests/utf.test index 150e395..a12cc73 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -635,7 +635,7 @@ test utf-7.33 {Tcl_UtfPrev -- overlong sequence} testutfprev { } 1 test utf-7.34 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC1\x80 -} 1 +} 2 test utf-7.35 {Tcl_UtfPrev -- overlong sequence} testutfprev { testutfprev A\xC2\x80 } 1 -- cgit v0.12