summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2018-05-01 18:41:27 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2018-05-01 18:41:27 (GMT)
commit51208ca53e4fceed8f2bea1005d2f5184ce94699 (patch)
tree5a3ff1af94f7d6964ffecc28a8e059620acb5dcc
parentbfc5a0090ff536e09dc97682d33965b26416e8a2 (diff)
downloadtcl-51208ca53e4fceed8f2bea1005d2f5184ce94699.zip
tcl-51208ca53e4fceed8f2bea1005d2f5184ce94699.tar.gz
tcl-51208ca53e4fceed8f2bea1005d2f5184ce94699.tar.bz2
Implement special "string totitle" for Extended Georgian characters (new behavior in Unicode 11)
-rw-r--r--generic/tclUtf.c12
-rw-r--r--tests/utf.test12
-rw-r--r--tools/uniClass.tcl2
3 files changed, 22 insertions, 4 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 319bfa0..1d73a7a 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -1034,7 +1034,10 @@ Tcl_UtfToTitle(
lowChar = (((lowChar & 0x3ff) << 10) | (ch & 0x3ff)) + 0x10000;
}
#endif
- lowChar = Tcl_UniCharToLower(lowChar);
+ /* Special exception for Gregorian characters, which don't have titlecase */
+ if ((lowChar < 0x1C90) || (lowChar >= 0x1CC0)) {
+ lowChar = Tcl_UniCharToLower(lowChar);
+ }
if (bytes < TclUtfCount(lowChar)) {
memcpy(dst, src, (size_t) bytes);
@@ -1355,8 +1358,9 @@ Tcl_UniCharToLower(
{
if (!UNICODE_OUT_OF_RANGE(ch)) {
int info = GetUniCharInfo(ch);
+ int mode = GetCaseType(info);
- if (GetCaseType(info) & 0x02) {
+ if ((mode & 0x02) && (mode != 0x7)) {
ch += GetDelta(info);
}
}
@@ -1392,7 +1396,9 @@ Tcl_UniCharToTitle(
* Subtract or add one depending on the original case.
*/
- ch += ((mode & 0x4) ? -1 : 1);
+ if (mode != 0x7) {
+ ch += ((mode & 0x4) ? -1 : 1);
+ }
} else if (mode == 0x4) {
ch -= GetDelta(info);
}
diff --git a/tests/utf.test b/tests/utf.test
index af471e1..39818cc 100644
--- a/tests/utf.test
+++ b/tests/utf.test
@@ -250,6 +250,9 @@ test utf-11.3 {Tcl_UtfToUpper} {
test utf-11.4 {Tcl_UtfToUpper} {
string toupper \u01e3ab
} \u01e2AB
+test utf-11.5 {Tcl_UtfToUpper Georgian (new in Unicode 11)} {
+ string toupper \u10d0\u1c90
+} \u1c90\u1c90
test utf-12.1 {Tcl_UtfToLower} {
string tolower {}
@@ -263,6 +266,9 @@ test utf-12.3 {Tcl_UtfToLower} {
test utf-12.4 {Tcl_UtfToLower} {
string tolower \u01e2AB
} \u01e3ab
+test utf-12.5 {Tcl_UtfToLower Georgian (new in Unicode 11)} {
+ string tolower \u10d0\u1c90
+} \u10d0\u10d0
test utf-13.1 {Tcl_UtfToTitle} {
string totitle {}
@@ -276,6 +282,12 @@ test utf-13.3 {Tcl_UtfToTitle} {
test utf-13.4 {Tcl_UtfToTitle} {
string totitle \u01f3ab
} \u01f2ab
+test utf-13.5 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+ string totitle \u10d0\u1c90
+} \u10d0\u1c90
+test utf-13.6 {Tcl_UtfToTitle Georgian (new in Unicode 11)} {
+ string totitle \u1c90\u10d0
+} \u1c90\u10d0
test utf-14.1 {Tcl_UtfNcasecmp} {
string compare -nocase a b
diff --git a/tools/uniClass.tcl b/tools/uniClass.tcl
index 8047894..86ec931 100644
--- a/tools/uniClass.tcl
+++ b/tools/uniClass.tcl
@@ -66,7 +66,7 @@ proc genTable {type} {
for {set i 0} {$i <= 0x10ffff} {incr i} {
if {$i == 0xd800} {
# Skip surrogates
- set i 0xdc00
+ set i 0xe000
}
if {[string is $type [format %c $i]]} {
if {$i == ($last + 1)} {