diff options
author | dgp <dgp@users.sourceforge.net> | 2020-04-27 12:29:47 (GMT) |
---|---|---|
committer | dgp <dgp@users.sourceforge.net> | 2020-04-27 12:29:47 (GMT) |
commit | 0650564b5d84ab359c0aa60685e55ac76e57cfac (patch) | |
tree | 664188f75274ddd90dbf326d70570c80ea86670c | |
parent | 7a2e5e227c82ec66f8e53328ed4fd4e1e5b923e8 (diff) | |
parent | 60d1d8c7eb1ac57639a5666836625c845fe38f2d (diff) | |
download | tcl-0650564b5d84ab359c0aa60685e55ac76e57cfac.zip tcl-0650564b5d84ab359c0aa60685e55ac76e57cfac.tar.gz tcl-0650564b5d84ab359c0aa60685e55ac76e57cfac.tar.bz2 |
[45ca2338cd] Revise the [string to*] machinery for custom builds.
-rw-r--r-- | generic/tclUtf.c | 72 | ||||
-rw-r--r-- | tests/utf.test | 22 |
2 files changed, 68 insertions, 26 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 665607f..0e9561d 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -87,6 +87,9 @@ static const unsigned char totalBytes[256] = { static int UtfCount(int ch); static int Invalid(unsigned char *src); +static int UCS4ToUpper(int ch); +static int UCS4ToLower(int ch); +static int UCS4ToTitle(int ch); /* *--------------------------------------------------------------------------- @@ -1007,7 +1010,7 @@ int Tcl_UtfToUpper( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, upChar; + int ch, upChar; char *src, *dst; int len; @@ -1017,8 +1020,8 @@ Tcl_UtfToUpper( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - upChar = Tcl_UniCharToUpper(ch); + len = TclUtfToUCS4(src, &ch); + upChar = UCS4ToUpper(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1026,7 +1029,7 @@ Tcl_UtfToUpper( * char to dst if its size is <= the original char. */ - if (len < UtfCount(upChar)) { + if (len < UtfCount(upChar) || ((upChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1060,7 +1063,7 @@ int Tcl_UtfToLower( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, lowChar; + int ch, lowChar; char *src, *dst; int len; @@ -1070,8 +1073,8 @@ Tcl_UtfToLower( src = dst = str; while (*src) { - len = TclUtfToUniChar(src, &ch); - lowChar = Tcl_UniCharToLower(ch); + len = TclUtfToUCS4(src, &ch); + lowChar = UCS4ToLower(ch); /* * To keep badly formed Utf strings from getting inflated by the @@ -1079,7 +1082,7 @@ Tcl_UtfToLower( * char to dst if its size is <= the original char. */ - if (len < UtfCount(lowChar)) { + if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1114,7 +1117,7 @@ int Tcl_UtfToTitle( char *str) /* String to convert in place. */ { - Tcl_UniChar ch = 0, titleChar, lowChar; + int ch, titleChar, lowChar; char *src, *dst; int len; @@ -1126,10 +1129,10 @@ Tcl_UtfToTitle( src = dst = str; if (*src) { - len = TclUtfToUniChar(src, &ch); - titleChar = Tcl_UniCharToTitle(ch); + len = TclUtfToUCS4(src, &ch); + titleChar = UCS4ToTitle(ch); - if (len < UtfCount(titleChar)) { + if (len < UtfCount(titleChar) || ((titleChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1138,14 +1141,14 @@ Tcl_UtfToTitle( src += len; } while (*src) { - len = TclUtfToUniChar(src, &ch); + len = TclUtfToUCS4(src, &ch); lowChar = ch; /* Special exception for Georgian Asomtavruli chars, no titlecase. */ if ((unsigned)(lowChar - 0x1C90) >= 0x30) { - lowChar = Tcl_UniCharToLower(lowChar); + lowChar = UCS4ToLower(lowChar); } - if (len < UtfCount(lowChar)) { + if (len < UtfCount(lowChar) || ((lowChar & 0xF800) == 0xD800)) { memmove(dst, src, len); dst += len; } else { @@ -1382,8 +1385,8 @@ TclUtfCasecmp( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToUpper( +static int +UCS4ToUpper( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1391,7 +1394,14 @@ Tcl_UniCharToUpper( if (GetCaseType(info) & 0x04) { ch -= GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToUpper( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToUpper(ch); } /* @@ -1410,8 +1420,8 @@ Tcl_UniCharToUpper( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToLower( +static int +UCS4ToLower( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1420,7 +1430,14 @@ Tcl_UniCharToLower( if ((mode & 0x02) && (mode != 0x7)) { ch += GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToLower( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToLower(ch); } /* @@ -1439,8 +1456,8 @@ Tcl_UniCharToLower( *---------------------------------------------------------------------- */ -Tcl_UniChar -Tcl_UniCharToTitle( +static int +UCS4ToTitle( int ch) /* Unicode character to convert. */ { int info = GetUniCharInfo(ch); @@ -1457,7 +1474,14 @@ Tcl_UniCharToTitle( } else if (mode == 0x4) { ch -= GetDelta(info); } - return (Tcl_UniChar) ch; + return ch; +} + +Tcl_UniChar +Tcl_UniCharToTitle( + int ch) /* Unicode character to convert. */ +{ + return (Tcl_UniChar) UCS4ToTitle(ch); } /* diff --git a/tests/utf.test b/tests/utf.test index cf0d1bf..6fed971 100644 --- a/tests/utf.test +++ b/tests/utf.test @@ -1111,6 +1111,12 @@ test utf-11.4 {Tcl_UtfToUpper} { test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} { string toupper \u10D0\u1C90 } \u1C90\u1C90 +test utf-11.6 {Tcl_UtfToUpper beyond U+FFFF} {Uesc fullutf} { + string toupper \U10428 +} \U10400 +test utf-11.7 {Tcl_UtfToUpper beyond U+FFFF} {pairsTo4bytes} { + string toupper \uD801\uDC28 +} \uD801\uDC00 test utf-12.1 {Tcl_UtfToLower} { string tolower {} @@ -1127,9 +1133,15 @@ test utf-12.4 {Tcl_UtfToLower} { test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} { string tolower \u10D0\u1C90 } \u10D0\u10D0 -test utf-12.6 {Tcl_UtfToUpper low/high surrogate)} ucs2 { +test utf-12.6 {Tcl_UtfToLower low/high surrogate)} { string tolower \uDC24\uD824 } \uDC24\uD824 +test utf-12.7 {Tcl_UtfToLower beyond U+FFFF} {Uesc fullutf} { + string tolower \U10400 +} \U10428 +test utf-12.8 {Tcl_UtfToLower beyond U+FFFF} {pairsTo4bytes} { + string tolower \uD801\uDC00 +} \uD801\uDC28 test utf-13.1 {Tcl_UtfToTitle} { string totitle {} @@ -1149,9 +1161,15 @@ test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} { string totitle \u1C90\u10D0 } \u1C90\u10D0 -test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} ucs2 { +test utf-13.7 {Tcl_UtfToTitle low/high surrogate)} { string totitle \uDC24\uD824 } \uDC24\uD824 +test utf-13.8 {Tcl_UtfToTitle beyond U+FFFF} {Uesc fullutf} { + string totitle \U10428 +} \U10400 +test utf-13.9 {Tcl_UtfToTitle beyond U+FFFF} {pairsTo4bytes} { + string totitle \uD801\uDC28 +} \uD801\uDC00 test utf-14.1 {Tcl_UtfNcasecmp} { string compare -nocase a b |