From a2f768dce66fc3f48b9a095fdc121fc9cc20b23a Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Tue, 5 May 2020 11:44:32 +0000 Subject: Fix Tcl_UtfPrev() such that it can never go back more than TCL_UTF_MAX bytes. Already done correctly on core-8-6-branch, but this was never forwarded to core-8-branch. --- generic/tclUtf.c | 10 +++++++--- tests/utf.test | 26 +++++++++++++------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 782657a..7b6ec63 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -815,13 +815,13 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - const char *endPtr = src + length - 4; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { src += TclUtfToUniChar(src, &ch); i++; } - endPtr += 4; + endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { src += TclUtfToUniChar(src, &ch); i++; @@ -1065,7 +1065,7 @@ Tcl_UtfPrev( /* Continue the search backwards... */ look--; - } while (trailBytesSeen < 4); + } while (trailBytesSeen < TCL_UTF_MAX); /* * We've seen TCL_UTF_MAX trail bytes, so we know there will not be a @@ -1073,7 +1073,11 @@ Tcl_UtfPrev( * accepting the fallback (for TCL_UTF_MAX > 3) or just go back as * far as we can. */ +#if TCL_UTF_MAX > 3 return fallback; +#else + return src - TCL_UTF_MAX; +#endif } /* diff --git a/tests/utf.test b/tests/utf.test index 7079bbf..ae50b0e 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -661,49 +661,49 @@ test utf-7.17.2 {Tcl_UtfPrev} {testutfprev testbytestring} { } 3 test utf-7.18.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0] -} 3 +} 1 test utf-7.18.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0] } 3 test utf-7.18.2 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 -} 3 +} 1 test utf-7.18.3 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] 4 } 3 test utf-7.18.4 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 -} 3 +} 1 test utf-7.18.5 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xF8] 4 } 3 test utf-7.19.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] -} 4 +} 2 test utf-7.19.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF8\xA0\xA0\xA0] } 4 test utf-7.20.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] -} 1 +} 2 test utf-7.20.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF2\xA0\xA0\xA0] } 1 test utf-7.21.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A\u8820[testbytestring \xA0] -} 4 +} 2 test utf-7.21.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A\u8820[testbytestring \xA0] } 4 test utf-7.22.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] -} 4 +} 2 test utf-7.22.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xD0\xA0\xA0\xA0] } 4 test utf-7.23.0 {Tcl_UtfPrev} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] -} 4 +} 2 test utf-7.23.1 {Tcl_UtfPrev} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xA0\xA0\xA0\xA0] } 4 @@ -730,7 +730,7 @@ test utf-7.28.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} } 1 test utf-7.29.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x80\x80\x80] -} 4 +} 2 test utf-7.29.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x80\x80\x80] } 4 @@ -763,7 +763,7 @@ test utf-7.38 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring} { } 1 test utf-7.39.0 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF0\x90\x80\x80] -} 1 +} 2 test utf-7.39.1 {Tcl_UtfPrev -- overlong sequence} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF0\x90\x80\x80] } 1 @@ -793,7 +793,7 @@ test utf-7.45 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestrin } 2 test utf-7.46.0 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring utf16} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] -} 3 +} 1 test utf-7.46.1 {Tcl_UtfPrev -- no lead byte at start} {testutfprev testbytestring ucs4} { testutfprev [testbytestring \xA0\xA0\xA0\xA0] } 3 @@ -808,7 +808,7 @@ test utf-7.47.2 {Tcl_UtfPrev, pointing to 3th byte of 3-byte invalid sequence} { } 0 test utf-7.48.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A\uDBFF\uDFFF -} 1 +} 2 test utf-7.48.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A\U10FFFF } 1 @@ -829,7 +829,7 @@ test utf-7.48.6 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbyte } 1 test utf-7.49.0 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring utf16} { testutfprev A[testbytestring \xF4\x90\x80\x80] -} 4 +} 2 test utf-7.49.1 {Tcl_UtfPrev, validity check [493dccc2de]} {testutfprev testbytestring ucs4} { testutfprev A[testbytestring \xF4\x90\x80\x80] } 4 -- cgit v0.12