From 783b10ecec013f3f096ca5cc2f56b628db6b2fce Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 11 May 2020 10:03:46 +0000 Subject: Tweak the Tcl_UtfPrev() implementation for TCL_UTF_MAX=4. This fixes 10 testcases in 4 groups (utf-7.10, utf-7.15, utf-7.40 and utf-7.48) , where Tcl_UtfPrev() didn't jump to the beginning of the UTF-8 character, even though there was no limitation which prevented that. So, this is actually a bug-fix for the TIP #389 implementation. --- generic/tclUtf.c | 2 +- tests/utf.test | 62 +++++++++++++++++++++++++------------------------------- 2 files changed, 29 insertions(+), 35 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a6dfb8a..9596aed 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -857,7 +857,7 @@ Tcl_UtfPrev( * it (the fallback) is correct. */ - || (trailBytesSeen >= totalBytes[byte])) { + || (trailBytesSeen >= complete[byte])) { /* * That is, (1 + trailBytesSeen > needed). * We've examined more bytes than needed to complete diff --git a/tests/utf.test b/tests/utf.test index 1b427c9..309b277 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -173,12 +173,12 @@ test utf-4.11 {Tcl_NumUtfChars: 3 bytes of 4-byte UTF-8 characater} {testnumutfc test utf-4.12.0 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs2} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 4 -test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { - testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end -} 1 -test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { +test utf-4.12.1 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring utf16} { testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end } 2 +test utf-4.12.2 {Tcl_NumUtfChars: #4-byte UTF-8 character} {testnumutfchars testbytestring ucs4} { + testnumutfchars [testbytestring \xF0\x9F\x92\xA9] end +} 1 test utf-4.13 {Tcl_NumUtfChars: end of string} {testnumutfchars testbytestring} { testnumutfchars foobar[testbytestring \xF2\xC2\xA0] end } 8 @@ -282,25 +282,19 @@ test utf-6.28 {Tcl_UtfNext} {testutfnext testbytestring} { test utf-6.29 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xE8\xF8] } 1 -test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { +test utf-6.30.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2] } 1 -test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring utf16} { - testutfnext [testbytestring \xF2] -} 1 -test utf-6.30.2 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { +test utf-6.30.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2] } -1 test utf-6.31 {Tcl_UtfNext} {testutfnext testbytestring} { testutfnext [testbytestring \xF2]G } 1 -test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2} { - testutfnext [testbytestring \xF2\xA0] -} 1 -test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring utf16} { +test utf-6.32.0 {Tcl_UtfNext} {testutfnext testbytestring ucs2_utf16} { testutfnext [testbytestring \xF2\xA0] } 1 -test utf-6.32.2 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { +test utf-6.32.1 {Tcl_UtfNext} {testutfnext testbytestring ucs4} { testutfnext [testbytestring \xF2\xA0] } -1 test utf-6.33 {Tcl_UtfNext} {testutfnext testbytestring} { @@ -709,22 +703,22 @@ test utf-7.9.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.9.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xF8\xA0] 3 } 2 -test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0] } 2 -test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0] } 1 -test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 2 -test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 3 } 1 -test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.10.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 2 -test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.10.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xF8\xA0] 3 } 1 test utf-7.11 {Tcl_UtfPrev} {testutfprev testbytestring} { @@ -766,22 +760,22 @@ test utf-7.14.1 {Tcl_UtfPrev} {testutfprev testbytestring} { test utf-7.14.2 {Tcl_UtfPrev} {testutfprev testbytestring} { testutfprev A[testbytestring \xF8\xA0\xA0\xF8] 4 } 3 -test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0] } 3 -test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.1 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0] } 1 -test utf-7.15.1.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.2 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 3 -test utf-7.15.1.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.3 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] 4 } 1 -test utf-7.15.2.0 {Tcl_UtfPrev} {testutfprev testbytestring ucs2_utf16} { +test utf-7.15.4 {Tcl_UtfPrev} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 3 -test utf-7.15.2.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { +test utf-7.15.5 {Tcl_UtfPrev} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF2\xA0\xA0\xF8] 4 } 1 test utf-7.16 {Tcl_UtfPrev} testutfprev { @@ -910,16 +904,16 @@ test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 -test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { +test utf-7.40.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 3 -test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { +test utf-7.40.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF0\x90\x80\x80] 4 } 1 -test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2_utf16} { +test utf-7.41.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 2 -test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { +test utf-7.41.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF0\x90\x80\x80] 3 } 1 test utf-7.42 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { @@ -955,16 +949,16 @@ test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbyte test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] } 1 -test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { +test utf-7.48.2 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 3 -test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { +test utf-7.48.3 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 4 } 1 -test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2_utf16} { +test utf-7.48.4 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs2} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 2 -test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { +test utf-7.48.5 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring fullutf} { testutfprev A[testbytestring \xF4\x8F\xBF\xBF] 3 } 1 test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring} { -- cgit v0.12