summaryrefslogtreecommitdiffstats
path: root/generic/tclUtf.c
diff options
context:
space:
mode:
Diffstat (limited to 'generic/tclUtf.c')
-rw-r--r--generic/tclUtf.c103
1 files changed, 75 insertions, 28 deletions
diff --git a/generic/tclUtf.c b/generic/tclUtf.c
index a72394d..43636b4 100644
--- a/generic/tclUtf.c
+++ b/generic/tclUtf.c
@@ -720,8 +720,7 @@ Tcl_UniCharAtIndex(
{
Tcl_UniChar ch = 0;
- while (index >= 0) {
- index--;
+ while (index-- >= 0) {
src += TclUtfToUniChar(src, &ch);
}
return ch;
@@ -751,8 +750,7 @@ Tcl_UtfAtIndex(
{
Tcl_UniChar ch = 0;
- while (index > 0) {
- index--;
+ while (index-- > 0) {
src += TclUtfToUniChar(src, &ch);
}
return src;
@@ -1066,16 +1064,17 @@ Tcl_UtfNcmp(
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
return (ch1 - ch2);
}
}
@@ -1116,16 +1115,17 @@ Tcl_UtfNcasecmp(
*/
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {
@@ -1135,6 +1135,52 @@ Tcl_UtfNcasecmp(
}
return 0;
}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * Tcl_UtfCmp --
+ *
+ * Compare UTF chars of string cs to string ct case sensitively.
+ * Replacement for strcmp in Tcl core, in places where UTF-8 should
+ * be handled.
+ *
+ * Results:
+ * Return <0 if cs < ct, 0 if cs == ct, or >0 if cs > ct.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+int
+TclUtfCmp(
+ const char *cs, /* UTF string to compare to ct. */
+ const char *ct) /* UTF string cs is compared to. */
+{
+ Tcl_UniChar ch1 = 0, ch2 = 0;
+
+ while (*cs && *ct) {
+ cs += TclUtfToUniChar(cs, &ch1);
+ ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
+#if TCL_UTF_MAX == 4
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
+#endif
+ return ch1 - ch2;
+ }
+ }
+ return UCHAR(*cs) - UCHAR(*ct);
+}
+
/*
*----------------------------------------------------------------------
@@ -1164,16 +1210,17 @@ TclUtfCasecmp(
while (*cs && *ct) {
cs += TclUtfToUniChar(cs, &ch1);
ct += TclUtfToUniChar(ct, &ch2);
+ if (ch1 != ch2) {
#if TCL_UTF_MAX == 4
- /* map high surrogate characters to values > 0xffff */
- if ((ch1 & 0xFC00) == 0xD800) {
- ch1 += 0x4000;
- }
- if ((ch2 & 0xFC00) == 0xD800) {
- ch2 += 0x4000;
- }
+ /* Surrogates always report higher than non-surrogates */
+ if (((ch1 & 0xFC00) == 0xD800)) {
+ if ((ch2 & 0xFC00) != 0xD800) {
+ return ch1;
+ }
+ } else if ((ch2 & 0xFC00) == 0xD800) {
+ return -ch2;
+ }
#endif
- if (ch1 != ch2) {
ch1 = Tcl_UniCharToLower(ch1);
ch2 = Tcl_UniCharToLower(ch2);
if (ch1 != ch2) {