diff options
author | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-25 09:32:13 (GMT) |
---|---|---|
committer | jan.nijtmans <nijtmans@users.sourceforge.net> | 2020-05-25 09:32:13 (GMT) |
commit | 53e89dcb06d57874c1fd1aff0bca30a01f351cc0 (patch) | |
tree | 4e258563dbd69db853bfaa850d7a82737c432960 | |
parent | 6de32c896abb44a00ad7368892924e9c9de5db11 (diff) | |
parent | d3eb8cf5e3d7b1535ba73e95dee737c08a36d3bb (diff) | |
download | tcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.zip tcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.tar.gz tcl-53e89dcb06d57874c1fd1aff0bca30a01f351cc0.tar.bz2 |
Fix compiled "string is <class>" for characters > U+FFFF. Add testcase exposing this bug.
-rw-r--r-- | generic/tclExecute.c | 6 | ||||
-rw-r--r-- | generic/tclInt.h | 2 | ||||
-rw-r--r-- | generic/tclUtf.c | 14 | ||||
-rw-r--r-- | tests/string.test | 2 |
4 files changed, 21 insertions, 3 deletions
diff --git a/generic/tclExecute.c b/generic/tclExecute.c index 5708772..cc366e7 100644 --- a/generic/tclExecute.c +++ b/generic/tclExecute.c @@ -5543,9 +5543,11 @@ TEBCresume( ustring1 = Tcl_GetUnicodeFromObj(valuePtr, &length); match = 1; if (length > 0) { + int ch; end = ustring1 + length; - for (p=ustring1 ; p<end ; p++) { - if (!tclStringClassTable[opnd].comparator(*p)) { + for (p=ustring1 ; p<end ; ) { + p += TclUniCharToUCS4(p, &ch); + if (!tclStringClassTable[opnd].comparator(ch)) { match = 0; break; } diff --git a/generic/tclInt.h b/generic/tclInt.h index f9cc6f3..f7b3cc4 100644 --- a/generic/tclInt.h +++ b/generic/tclInt.h @@ -3252,11 +3252,13 @@ MODULE_SCOPE int TclUtfCasecmp(const char *cs, const char *ct); MODULE_SCOPE int TclUtfCount(int ch); #if TCL_UTF_MAX > 3 # define TclUtfToUCS4 Tcl_UtfToUniChar +# define TclUniCharToUCS4(src, ptr) (*ptr = *(src),1) # define TclUCS4Complete Tcl_UtfCharComplete # define TclChar16Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 3) : Tcl_UtfCharComplete((src), (length))) #else MODULE_SCOPE int TclUtfToUCS4(const char *src, int *ucs4Ptr); + MODULE_SCOPE int TclUniCharToUCS4(const Tcl_UniChar *src, int *ucs4Ptr); # define TclUCS4Complete(src, length) (((unsigned)((unsigned char)*(src) - 0xF0) < 5) \ ? ((length) >= 4) : Tcl_UtfCharComplete((src), (length))) # define TclChar16Complete Tcl_UtfCharComplete diff --git a/generic/tclUtf.c b/generic/tclUtf.c index 2eb959e..11bde5c 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -2621,6 +2621,20 @@ TclUtfToUCS4( /* Make use of the #undef Tcl_UtfToUniChar above, which already handles UCS4. */ return Tcl_UtfToUniChar(src, ucs4Ptr); } + +int +TclUniCharToUCS4( + const Tcl_UniChar *src, /* The Tcl_UniChar string. */ + int *ucs4Ptr) /* Filled with the UCS4 codepoint represented + * by the Tcl_UniChar string. */ +{ + if (((src[0] & 0xFC00) == 0xD800) && ((src[1] & 0xFC00) == 0xDC00)) { + *ucs4Ptr = (((src[0] & 0x3FF) << 10) | (src[01] & 0x3FF)) + 0x10000; + return 2; + } + *ucs4Ptr = src[0]; + return 1; +} #endif /* diff --git a/tests/string.test b/tests/string.test index 12821c0..55989e0 100644 --- a/tests/string.test +++ b/tests/string.test @@ -777,7 +777,7 @@ test string-6.80.$noComp {string is wordchar, true} { run {string is wordchar abc_123} } 1 test string-6.81.$noComp {string is wordchar, unicode true} { - run {string is wordchar abc\xFCab\xDCAB\u5001} + run {string is wordchar abc\xFCab\xDCAB\u5001\U1D7CA} } 1 test string-6.82.$noComp {string is wordchar, false} { list [run {string is wordchar -fail var abcd.ef}] $var |