From fb0ed853e7c49ff24e17f4cb633876d0780b64b5 Mon Sep 17 00:00:00 2001
From: sebres <sebres@users.sourceforge.net>
Date: Tue, 10 Jan 2017 22:38:22 +0000
Subject: lowercase on demand, string index tree can search any-case now, clock
 scan considered utf-8 char length in words by format parsing

---
 generic/tclClockFmt.c   | 17 +++++++----------
 generic/tclStrIdxTree.c | 20 ++++++++++----------
 generic/tclStrIdxTree.h | 35 +++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 20 deletions(-)

diff --git a/generic/tclClockFmt.c b/generic/tclClockFmt.c
index e66c525..92040d8 100644
--- a/generic/tclClockFmt.c
+++ b/generic/tclClockFmt.c
@@ -1245,7 +1245,7 @@ ClockGetOrParseScanFormat(
 
 	fss->scnTok = tok = ckalloc(sizeof(*tok) * fss->scnTokC);
 	memset(tok, 0, sizeof(*(tok)));
-	for (p = strFmt; p != e; p++) {
+	for (p = strFmt; p < e;) {
 	    switch (*p) {
 	    case '%':
 	    if (1) {
@@ -1265,6 +1265,7 @@ ClockGetOrParseScanFormat(
 		    tok->tokWord.start = p;
 		    tok->tokWord.end = p+1;
 		    AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+		    p++;
 		    continue;
 		break;
 		case 'E':
@@ -1315,6 +1316,8 @@ ClockGetOrParseScanFormat(
 		}
 		/* next token */
 		AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+		p++;
+		continue;
 	    }
 	    break;
 	    case ' ':
@@ -1325,6 +1328,8 @@ ClockGetOrParseScanFormat(
 		}
 		tok->map = &ScnSpecTokenMap[cp - ScnSpecTokenMapIndex];
 		AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+		p++;
+		continue;
 	    break;
 	    default:
 word_tok:
@@ -1339,12 +1344,11 @@ word_tok:
 		    wordTok->map = &ScnWordTokenMap;
 		    AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
 		}
-		continue;
 	    }
 	    break;
 	    }
 
-	    continue;
+	    p = TclUtfNext(p);
 	}
 
 	/* calculate end distance value for each tokens */
@@ -1468,11 +1472,6 @@ ClockScan(
 
     yyMeridian = MER24;
 
-    /* lower case given string into new object */
-    strObj = Tcl_NewStringObj(TclGetString(strObj), strObj->length);
-    Tcl_IncrRefCount(strObj);
-    strObj->length = Tcl_UtfToLower(TclGetString(strObj));
-
     p = TclGetString(strObj);
     end = p + strObj->length;
     /* in strict mode - bypass spaces at begin / end only (not between tokens) */
@@ -1726,8 +1725,6 @@ not_match:
 
 done:
 
-    Tcl_DecrRefCount(strObj);
-
     return ret;
 }
 
diff --git a/generic/tclStrIdxTree.c b/generic/tclStrIdxTree.c
index f078c7a..afb53e5 100644
--- a/generic/tclStrIdxTree.c
+++ b/generic/tclStrIdxTree.c
@@ -84,7 +84,7 @@ TclStrIdxTreeSearch(
 {
     TclStrIdxTree *parent = tree, *prevParent = tree;
     TclStrIdx  *item = tree->firstPtr, *prevItem = NULL;
-    const char *s = start, *e, *cin, *preve;
+    const char *s = start, *f, *cin, *cinf, *prevf;
     int offs = 0;
 
     if (item == NULL) {
@@ -94,23 +94,23 @@ TclStrIdxTreeSearch(
     /* search in tree */
     do {
 	cin = TclGetString(item->key) + offs;
-	e = TclUtfFindEqual(s, end, cin, cin + item->length);
+	f = TclUtfFindEqualNCInLwr(s, end, cin, cin + item->length, &cinf);
 	/* if something was found */
-	if (e > s) {
+	if (f > s) {
 	    /* if whole string was found */
-	    if (e >= end) {
-		start = e;
+	    if (f >= end) {
+		start = f;
 		goto done;
 	    };
 	    /* set new offset and shift start string */
-	    offs += (e - s);
-	    s = e;
+	    offs += cinf - cin;
+	    s = f;
 	    /* if match item, go deeper as long as possible */
 	    if (offs >= item->length && item->childTree.firstPtr) {
 		/* save previuosly found item (if not ambigous) for 
 		 * possible fallback (few greedy match) */
 		if (item->value != -1) {
-		    preve = e;
+		    prevf = f;
 		    prevItem = item;
 		    prevParent = parent;
 		}
@@ -119,7 +119,7 @@ TclStrIdxTreeSearch(
 		continue;
 	    }
 	    /* no children - return this item and current chars found */
-	    start = e;
+	    start = f;
 	    goto done;
 	}
 
@@ -131,7 +131,7 @@ TclStrIdxTreeSearch(
     if (prevItem != NULL) {
 	item = prevItem;
 	parent = prevParent;
-	start = preve;
+	start = prevf;
     }
 
 done:
diff --git a/generic/tclStrIdxTree.h b/generic/tclStrIdxTree.h
index e80d3db..d2d6f0b 100644
--- a/generic/tclStrIdxTree.h
+++ b/generic/tclStrIdxTree.h
@@ -89,6 +89,41 @@ TclUtfFindEqualNC(
     return ret;
 }
 
+inline const char *
+TclUtfFindEqualNCInLwr(
+    register const char *cs,	/* UTF string (in anycase) to find in cin. */
+    register const char *cse,	/* End of cs */
+    register const char *cin,	/* UTF string (in lowercase) will be browsed. */
+    register const char *cine,	/* End of cin */
+    const char	     **cinfnd)	/* Return position in cin */
+{
+    register const char *ret = cs;
+    Tcl_UniChar ch1, ch2;
+    do {
+	cs += TclUtfToUniChar(cs, &ch1);
+	cin += TclUtfToUniChar(cin, &ch2);
+	if (ch1 != ch2) {
+	    ch1 = Tcl_UniCharToLower(ch1);
+	    if (ch1 != ch2) break;
+	}
+	*cinfnd = cin;
+    } while ((ret = cs) < cse && cin < cine);
+    return ret;
+}
+
+inline char *
+TclUtfNext(
+    register const char *src)	/* The current location in the string. */
+{
+    if (((unsigned char) *(src)) < 0xC0) {
+	return ++src;
+    } else {
+	Tcl_UniChar ch;
+	return src + TclUtfToUniChar(src, &ch);
+    }
+}
+
+
 /*
  * Primitives to safe set, reset and free references.
  */
-- 
cgit v0.12