summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-25 09:32:13 (GMT)
committerjan.nijtmans <nijtmans@users.sourceforge.net>2020-05-25 09:32:13 (GMT)
commit53e89dcb06d57874c1fd1aff0bca30a01f351cc0 (patch)
tree4e258563dbd69db853bfaa850d7a82737c432960
parent6de32c896abb44a00ad7368892924e9c9de5db11 (diff)
parentd3eb8cf5e3d7b1535ba73e95dee737c08a36d3bb (diff)
downloadtcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.zip
tcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.tar.gz
tcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.tar.bz2
Fix compiled "string is <class>" for characters > U+FFFF. Add testcase exposing this bug.
-rw-r--r--generic/tclExecute.c6
-rw-r--r--generic/tclInt.h2
-rw-r--r--generic/tclUtf.c14
-rw-r--r--tests/string.test2
4 files changed, 21 insertions, 3 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c
index 5708772..cc366e7 100644
--- a/generic/tclExecute.c
+++ b/generic/tclExecute.c
@@ -5543,9 +5543,11 @@ TEBCresume(
ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length);
match = 1;
if (length > 0) {
+ int ch;
end = ustring1 + length;
- for (p=ustring1 ; p<end ; p++) {
- if (!tclStringClassTable[opnd].comparator(*p)) {
+ for (p=ustring1 ; p<end ; ) {
+ p += TclUniCharToUCS4(p, &ch);
+ if (!tclStringClassTable[opnd].comparator(ch)) {
match = 0;
break;
}
diff --git a/generic/tclInt.h b/generic/tclInt.h
index f9cc6f3..f7b3cc4 100644
--- a/generic/tclInt.h
+++ b/generic/tclInt.h
@@ -3252,11 +3252,13 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct);
MODULE_SCOPE int TclUtfCount(int ch);
#if TCL_UTF_MAX > 3
# define TclUtfToUCS4 Tcl_UtfToUniChar
+# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1)
# define TclUCS4Complete Tcl_UtfCharComplete
# define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 3) : Tcl_UtfCharComplete((src), (length)))
#else
MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr);
+ MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr);
# define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \
? ((length) >= 4) : Tcl_UtfCharComplete((src), (length)))
# define TclChar16Complete Tcl_UtfCharComplete
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index 2eb959e..11bde5c 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -2621,6 +2621,20 @@ TclUtfToUCS4(
/* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */
return Tcl_UtfToUniChar(src, ucs4Ptr);
}
+
+int
+TclUniCharToUCS4(
+ const Tcl_UniChar *src, /* The Tcl_UniChar string. */
+ int *ucs4Ptr) /* Filled with the UCS4 codepoint represented
+ * by the Tcl_UniChar string. */
+{
+ if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) {
+ *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000;
+ return 2;
+ }
+ *ucs4Ptr = src[0];
+ return 1;
+}
#endif
/*
diff --git a/tests/string.test b/tests/string.test
index 12821c0..55989e0 100644
--- a/tests/string.test
+++ b/tests/string.test
@@ -777,7 +777,7 @@ test string-6.80.$noComp {string is wordchar, true} {
run {string is wordchar abc_123}
} 1
test string-6.81.$noComp {string is wordchar, unicode true} {
- run {string is wordchar abc\xFCab\xDCAB\u5001}
+ run {string is wordchar abc\xFCab\xDCAB\u5001\U1D7CA}
} 1
test string-6.82.$noComp {string is wordchar, false} {
list [run {string is wordchar -fail var abcd.ef}] $var