diff options
| -rw-r--r-- | generic/tclEncoding.c | 2 | ||||
| -rw-r--r-- | generic/tclUtf.c | 2 | ||||
| -rw-r--r-- | tests/utf.test | 59 |
3 files changed, 30 insertions, 33 deletions
diff --git a/generic/tclEncoding.c b/generic/tclEncoding.c index 1584de0..5c7aab8 100644 --- a/generic/tclEncoding.c +++ b/generic/tclEncoding.c @@ -2341,7 +2341,7 @@ UtfToUtfProc( *dst++ = 0; *chPtr = 0; /* reset surrogate handling */ src += 2; - } else if (!TclUCS4Complete(src, srcEnd - src)) { + } else if (!Tcl_UtfCharComplete(src, srcEnd - src)) { /* * Always check before using TclUtfToUniChar. Not doing can so * cause it run beyond the end of the buffer! If we happen such an diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 9ffbfba..160e444 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2360,7 +2360,7 @@ TclUtfToUCS4( len = TclUtfToUniChar(src, &ch); fullchar = ch; -#if TCL_UTF_MAX == 4 +#if TCL_UTF_MAX <= 4 /* 4-byte UTF-8 is supported; decode surrogates */ if ((ch >= 0xD800) && len < 3) { diff --git a/tests/utf.test b/tests/utf.test index 0929801..2bfb9ea 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -496,10 +496,7 @@ test utf-6.91.1 {Tcl_UtfNext, validity check [493dccc2de]} {testutfnext fullutf} test utf-6.92 {Tcl_UtfNext, pointing to 2th byte of 4-byte valid sequence} testutfnext { testutfnext \xA0\xA0\xA0 } 1 -test utf-6.93.0 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext ucs2} { - testutfnext \x80\x80\x80 -} 1 -test utf-6.93.1 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} {testutfnext fullutf} { +test utf-6.93 {Tcl_UtfNext, pointing to 2th byte of 4-byte invalid sequence} testutfnext { testutfnext \x80\x80\x80 } 1 test utf-6.94 {Tcl_UtfNext, pointing to 2th byte of 5-byte invalid sequence} testutfnext { @@ -977,37 +974,37 @@ test utf-8.1 {Tcl_UniCharAtIndex: index = 0} { } a test utf-8.2 {Tcl_UniCharAtIndex: index = 0} { string index \u4E4E\u25A 0 -} "\u4E4E" +} \u4E4E test utf-8.3 {Tcl_UniCharAtIndex: index > 0} { string index abcd 2 } c test utf-8.4 {Tcl_UniCharAtIndex: index > 0} { string index \u4E4E\u25A\xFF\u543 2 -} "\uFF" +} \uFF test utf-8.5.0 {Tcl_UniCharAtIndex: high surrogate} ucs2 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.5.1 {Tcl_UniCharAtIndex: high surrogate} ucs4 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.5.2 {Tcl_UniCharAtIndex: high surrogate} tip389 { string index \uD842 0 -} "\uD842" +} \uD842 test utf-8.6 {Tcl_UniCharAtIndex: low surrogate} { string index \uDC42 0 -} "\uDC42" +} \uDC42 test utf-8.7.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 0 -} "\uD83D" +} \uD83D test utf-8.7.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 0 -} "\U1F600" +} \U1F600 test utf-8.7.2 {Tcl_UniCharAtIndex: Emoji} tip389 { string index \uD83D\uDE00G 0 -} "\U1F600" +} \U1F600 test utf-8.8.0 {Tcl_UniCharAtIndex: Emoji} ucs2 { string index \uD83D\uDE00G 1 -} "\uDE00" +} \uDE00 test utf-8.8.1 {Tcl_UniCharAtIndex: Emoji} ucs4 { string index \uD83D\uDE00G 1 } G @@ -1025,13 +1022,13 @@ test utf-8.9.2 {Tcl_UniCharAtIndex: Emoji} tip389 { } G test utf-8.10.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 0 -} "\uFFFD" +} \uFFFD test utf-8.10.1 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs4} { string index \U1F600G 0 -} "\U1F600" +} \U1F600 test utf-8.10.2 {Tcl_UniCharAtIndex: Emoji} {Uesc tip389} { string index \U1F600G 0 -} "\U1F600" +} \U1F600 test utf-8.11.0 {Tcl_UniCharAtIndex: Emoji} {Uesc ucs2} { string index \U1F600G 1 } G @@ -1056,22 +1053,22 @@ test utf-9.1 {Tcl_UtfAtIndex: index = 0} { } abc test utf-9.2 {Tcl_UtfAtIndex: index > 0} { string range \u4E4E\u25A\xFF\u543klmnop 1 5 -} "\u25A\xFF\u543kl" +} \u25A\xFF\u543kl test utf-9.3.0 {Tcl_UtfAtIndex: index = 0, Emoji} ucs2 { string range \uD83D\uDE00G 0 0 -} "\uD83D" +} \uD83D test utf-9.3.1 {Tcl_UtfAtIndex: index = 0, Emoji} ucs4 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.3.2 {Tcl_UtfAtIndex: index = 0, Emoji} tip389 { string range \uD83D\uDE00G 0 0 -} "\U1F600" +} \U1F600 test utf-9.4.0 {Tcl_UtfAtIndex: index > 0, Emoji} ucs2 { string range \uD83D\uDE00G 1 1 -} "\uDE00" +} \uDE00 test utf-9.4.1 {Tcl_UtfAtIndex: index > 0, Emoji} ucs4 { string range \uD83D\uDE00G 1 1 -} "G" +} G test utf-9.4.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { string range \uD83D\uDE00G 1 1 } {} @@ -1086,19 +1083,19 @@ test utf-9.5.2 {Tcl_UtfAtIndex: index > 0, Emoji} tip389 { } G test utf-9.6.0 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs2} { string range \U1f600G 0 0 -} "\uFFFD" +} \uFFFD test utf-9.6.1 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc ucs4} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.6.2 {Tcl_UtfAtIndex: index = 0, Emoji} {Uesc tip389} { string range \U1f600G 0 0 -} "\U1F600" +} \U1F600 test utf-9.7.0 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs2} { string range \U1f600G 1 1 } G test utf-9.7.1 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc ucs4} { string range \U1f600G 1 1 -} "G" +} G test utf-9.7.2 {Tcl_UtfAtIndex: index > 0, Emoji} {Uesc tip389} { string range \U1f600G 1 1 } {} @@ -1182,7 +1179,7 @@ bsCheck \uA 10 bsCheck \340 224 bsCheck \uA1 161 bsCheck \u4E21 20001 -bsCheck \741 225 pre388 ;# == \341 +bsCheck \741 225 pre388 ;# == \341 bsCheck \741 60 !pre388 ;# == \74 1 bsCheck \U 85 bsCheck \Uk 85 @@ -1344,7 +1341,7 @@ test utf-20.2 {[4c591fa487] TclUniCharNcmp/TclUtfNcmp} knownBug { set two [format %c 0x10000] set first [string compare $one $two] string range $one 0 0 - string range $two 0 0 + string range $two 0 0 set second [string compare $one $two] expr {($first == $second) ? "agree" : "disagree"} } agree @@ -1466,9 +1463,9 @@ UniCharCaseCmpTest < a b UniCharCaseCmpTest > b a UniCharCaseCmpTest > B a UniCharCaseCmpTest > aBcB abca -UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 +UniCharCaseCmpTest < \uFFFF [format %c 0x10000] ucs4 UniCharCaseCmpTest < \uFFFF \U10000 {Uesc ucs4} -UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 +UniCharCaseCmpTest > [format %c 0x10000] \uFFFF ucs4 UniCharCaseCmpTest > \U10000 \uFFFF {Uesc ucs4} |
