summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--generic/tclExecute.c6
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclUtf.c14
-rw-r--r--tests/string.test2
4 files changed, 21 insertions, 3 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 44ff2b1..4335551 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5333,9 +5333,11 @@ TEBCresume(
ustring1 = TclGetUnicodeFromObj(valuePtr, &slength);
match = 1;
if (slength > 0) {
+ int ch;
end = ustring1 + slength;
- for (p=ustring1 ; p<end ; p++) {
- if (!tclStringClassTable[opnd].comparator(*p)) {
+ for (p=ustring1 ; p<end ; ) {
+ p += TclUniCharToUCS4(p, &ch);
+ if (!tclStringClassTable[opnd].comparator(ch)) {
match = 0;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index d6d6d0e..f0aa5ea 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3183,11 +3183,13 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE size_t TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
+# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
# define TclUCS4Complete Tcl_UtfCharComplete
# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
# define TclChar16Complete Tcl_UtfCharComplete
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 3de767d..ac76309 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2628,6 +2628,20 @@ TclUtfToUCS4(
/* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
return Tcl_UtfToUniChar(src, ucs4Ptr);
}
+
+int
+TclUniCharToUCS4(
+ const Tcl_UniChar *src, /* The Tcl_UniChar string. */
+ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
+ * by the Tcl_UniChar string. */
+{
+ if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ return 2;
+ }
+ *ucs4Ptr = src[0];
+ return 1;
+}
#endif
/*
diff --git a/tests/string.test b/tests/string.test
index ee27694..5d0a380 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -777,7 +777,7 @@ test string-6.80.$noComp {string is wordchar, true} {
run {string is wordchar abc_123}
} 1
test string-6.81.$noComp {string is wordchar, unicode true} {
- run {string is wordchar abc\xFCab\xDCAB\u5001}
+ run {string is wordchar abc\xFCab\xDCAB\u5001\U1D7CA}
} 1
test string-6.82.$noComp {string is wordchar, false} {
list [run {string is wordchar -fail var abcd.ef}] $var