From 8c3587a6e899c6fd12fd0563312c4a20c289d8fd Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Mon, 4 May 2020 08:35:44 +0000 Subject: (partial) fix for [9d0cb35bb2]: Various issues with core-8-6-branch, TCL_UTF_MAX=4. (even though TCL_UTF_MAX=4 is unsupported, it would be nice to make it work) Marked various test-cases as "knownBug", those work correctly in core-8-branch (8.7). The fix there could be backported. Low prio. --- generic/tclUtf.c | 34 ++++++++++++++++++---------------- tests/utf.test | 16 ++++++++-------- 2 files changed, 26 insertions(+), 24 deletions(-) diff --git a/generic/tclUtf.c b/generic/tclUtf.c index a2080dd..ab3c577 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -359,8 +359,8 @@ Tcl_UniCharToUtfDString( int Tcl_UtfToUniChar( - register const char *src, /* The UTF-8 string. */ - register Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by + const char *src, /* The UTF-8 string. */ + Tcl_UniChar *chPtr)/* Filled with the Tcl_UniChar represented by * the UTF-8 string. */ { Tcl_UniChar byte; @@ -580,12 +580,12 @@ Tcl_UtfCharComplete( int Tcl_NumUtfChars( - register const char *src, /* The UTF-8 string to measure. */ + const char *src, /* The UTF-8 string to measure. */ int length) /* The length of the string in bytes, or -1 * for strlen(string). */ { Tcl_UniChar ch = 0; - register int i = 0; + int i = 0; /* * The separate implementations are faster. @@ -601,27 +601,29 @@ Tcl_NumUtfChars( } if (i < 0) i = INT_MAX; /* Bug [2738427] */ } else { - register const char *endPtr = src + length - TCL_UTF_MAX; + const char *endPtr = src + length - TCL_UTF_MAX; while (src < endPtr) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } endPtr += TCL_UTF_MAX; while ((src < endPtr) && Tcl_UtfCharComplete(src, endPtr - src)) { +#if TCL_UTF_MAX < 4 if (((unsigned)UCHAR(*src) - 0xF0) < 5) { /* treat F0 - F4 as single character */ ch = 0; src++; - } else { - src += TclUtfToUniChar(src, &ch); - } + } else +#endif + src += TclUtfToUniChar(src, &ch); i++; } if (src < endPtr) { @@ -890,8 +892,8 @@ Tcl_UtfPrev( Tcl_UniChar Tcl_UniCharAtIndex( - register const char *src, /* The UTF-8 string to dereference. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string to dereference. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; @@ -918,8 +920,8 @@ Tcl_UniCharAtIndex( const char * Tcl_UtfAtIndex( - register const char *src, /* The UTF-8 string. */ - register int index) /* The position of the desired character. */ + const char *src, /* The UTF-8 string. */ + int index) /* The position of the desired character. */ { Tcl_UniChar ch = 0; int len = 0; @@ -1191,7 +1193,7 @@ TclpUtfNcmp2( * fine in the strcmp manner. */ - register int result = 0; + int result = 0; for ( ; numBytes != 0; numBytes--, cs++, ct++) { if (*cs != *ct) { diff --git a/tests/utf.test b/tests/utf.test index c0fed6f..a3c049d 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -499,9 +499,9 @@ test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testu test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { testutfnext \x80\x80\x80 } 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { +test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf knownBug} { testutfnext \x80\x80\x80 -} 1 +} 3 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { testutfnext \xA0\xA0\xA0\xA0 } 1 @@ -987,10 +987,10 @@ test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 } \uD842 -test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { +test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} {ucs4 knownBug} { string index \uD842 0 } \uD842 -test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { +test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} {tip389 knownBug} { string index \uD842 0 } \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { @@ -1002,7 +1002,7 @@ test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 } \U1F600 -test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { string index \uD83D\uDE00G 0 } \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1011,7 +1011,7 @@ test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G -test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} tip389 { +test utf-8.8.2 {Tcl_UniCharAtIndex: Emoji} {tip389 knownBug} { string index \uD83D\uDE00G 1 } {} test utf-8.9.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { @@ -1029,7 +1029,7 @@ test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 } \U1F600 -test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { string index \U1F600G 0 } \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { @@ -1038,7 +1038,7 @@ test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { test utf-8.11.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 1 } G -test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { +test utf-8.11.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389 knownBug} { string index \U1F600G 1 } {} test utf-8.12.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { -- cgit v0.12