From 301f6bcb839b301e1e3d3a92e3713696780c95ca Mon Sep 17 00:00:00 2001 From: "jan.nijtmans" Date: Thu, 28 Dec 2017 18:49:24 +0000 Subject: Fix handling of surrogates (when TCL_UTF_MAX > 3) in Tcl_UtfNcmp()/Tcl_UtfNcasecmp()/TclUtfCasecmp(). Backported from core-8-branch, where this was fixed already. --- generic/tclPkg.c | 6 +++--- generic/tclUtf.c | 57 ++++++++++++++++++++++++++++---------------------------- 2 files changed, 32 insertions(+), 31 deletions(-) diff --git a/generic/tclPkg.c b/generic/tclPkg.c index 4f6faa8..6d826a9 100644 --- a/generic/tclPkg.c +++ b/generic/tclPkg.c @@ -414,7 +414,7 @@ PkgRequireCore( continue; } - + /* Check satisfaction of requirements before considering the current version further. */ if (reqc > 0) { satisfies = SomeRequirementSatisfied(availVersion, reqc, reqv); @@ -424,7 +424,7 @@ PkgRequireCore( continue; } } - + if (bestPtr != NULL) { int res = CompareVersions(availVersion, bestVersion, NULL); @@ -485,7 +485,7 @@ PkgRequireCore( /* * Clean up memorized internal reps, if any. */ - + if (bestVersion != NULL) { ckfree(bestVersion); bestVersion = NULL; diff --git a/generic/tclUtf.c b/generic/tclUtf.c index c60e99e..6255a4e 100644 --- a/generic/tclUtf.c +++ b/generic/tclUtf.c @@ -726,8 +726,7 @@ Tcl_UniCharAtIndex( { Tcl_UniChar ch = 0; - while (index >= 0) { - index--; + while (index-- >= 0) { src += TclUtfToUniChar(src, &ch); } return ch; @@ -757,8 +756,7 @@ Tcl_UtfAtIndex( { Tcl_UniChar ch = 0; - while (index > 0) { - index--; + while (index-- > 0) { src += TclUtfToUniChar(src, &ch); } return src; @@ -1072,16 +1070,17 @@ Tcl_UtfNcmp( cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { return (ch1 - ch2); } } @@ -1122,16 +1121,17 @@ Tcl_UtfNcasecmp( */ cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { @@ -1170,16 +1170,17 @@ TclUtfCasecmp( while (*cs && *ct) { cs += TclUtfToUniChar(cs, &ch1); ct += TclUtfToUniChar(ct, &ch2); + if (ch1 != ch2) { #if TCL_UTF_MAX == 4 - /* map high surrogate characters to values > 0xffff */ - if ((ch1 & 0xFC00) == 0xD800) { - ch1 += 0x4000; - } - if ((ch2 & 0xFC00) == 0xD800) { - ch2 += 0x4000; - } + /* Surrogates always report higher than non-surrogates */ + if (((ch1 & 0xFC00) == 0xD800)) { + if ((ch2 & 0xFC00) != 0xD800) { + return ch1; + } + } else if ((ch2 & 0xFC00) == 0xD800) { + return -ch2; + } #endif - if (ch1 != ch2) { ch1 = Tcl_UniCharToLower(ch1); ch2 = Tcl_UniCharToLower(ch2); if (ch1 != ch2) { -- cgit v0.12