summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsebres <sebres@users.sourceforge.net>2017-01-10 22:38:22 (GMT)
committersebres <sebres@users.sourceforge.net>2017-01-10 22:38:22 (GMT)
commitfb0ed853e7c49ff24e17f4cb633876d0780b64b5 (patch)
tree963ae9341cf38e17e185880cf45a0f9f931c5616
parent2561cb41c0da4522531af13b664373518e0b8008 (diff)
downloadtcl-fb0ed853e7c49ff24e17f4cb633876d0780b64b5.zip
tcl-fb0ed853e7c49ff24e17f4cb633876d0780b64b5.tar.gz
tcl-fb0ed853e7c49ff24e17f4cb633876d0780b64b5.tar.bz2
lowercase on demand, string index tree can search any-case now, clock scan considered utf-8 char length in words by format parsing
-rw-r--r--generic/tclClockFmt.c17
-rw-r--r--generic/tclStrIdxTree.c20
-rw-r--r--generic/tclStrIdxTree.h35
3 files changed, 52 insertions, 20 deletions
diff --git a/generic/tclClockFmt.c b/generic/tclClockFmt.c
index e66c525..92040d8 100644
--- a/generic/tclClockFmt.c
+++ b/generic/tclClockFmt.c
@@ -1245,7 +1245,7 @@ ClockGetOrParseScanFormat(
fss->scnTok = tok = ckalloc(sizeof(*tok) * fss->scnTokC);
memset(tok, 0, sizeof(*(tok)));
- for (p = strFmt; p != e; p++) {
+ for (p = strFmt; p < e;) {
switch (*p) {
case '%':
if (1) {
@@ -1265,6 +1265,7 @@ ClockGetOrParseScanFormat(
tok->tokWord.start = p;
tok->tokWord.end = p+1;
AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+ p++;
continue;
break;
case 'E':
@@ -1315,6 +1316,8 @@ ClockGetOrParseScanFormat(
}
/* next token */
AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+ p++;
+ continue;
}
break;
case ' ':
@@ -1325,6 +1328,8 @@ ClockGetOrParseScanFormat(
}
tok->map = &ScnSpecTokenMap[cp - ScnSpecTokenMapIndex];
AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
+ p++;
+ continue;
break;
default:
word_tok:
@@ -1339,12 +1344,11 @@ word_tok:
wordTok->map = &ScnWordTokenMap;
AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
}
- continue;
}
break;
}
- continue;
+ p = TclUtfNext(p);
}
/* calculate end distance value for each tokens */
@@ -1468,11 +1472,6 @@ ClockScan(
yyMeridian = MER24;
- /* lower case given string into new object */
- strObj = Tcl_NewStringObj(TclGetString(strObj), strObj->length);
- Tcl_IncrRefCount(strObj);
- strObj->length = Tcl_UtfToLower(TclGetString(strObj));
-
p = TclGetString(strObj);
end = p + strObj->length;
/* in strict mode - bypass spaces at begin / end only (not between tokens) */
@@ -1726,8 +1725,6 @@ not_match:
done:
- Tcl_DecrRefCount(strObj);
-
return ret;
}
diff --git a/generic/tclStrIdxTree.c b/generic/tclStrIdxTree.c
index f078c7a..afb53e5 100644
--- a/generic/tclStrIdxTree.c
+++ b/generic/tclStrIdxTree.c
@@ -84,7 +84,7 @@ TclStrIdxTreeSearch(
{
TclStrIdxTree *parent = tree, *prevParent = tree;
TclStrIdx *item = tree->firstPtr, *prevItem = NULL;
- const char *s = start, *e, *cin, *preve;
+ const char *s = start, *f, *cin, *cinf, *prevf;
int offs = 0;
if (item == NULL) {
@@ -94,23 +94,23 @@ TclStrIdxTreeSearch(
/* search in tree */
do {
cin = TclGetString(item->key) + offs;
- e = TclUtfFindEqual(s, end, cin, cin + item->length);
+ f = TclUtfFindEqualNCInLwr(s, end, cin, cin + item->length, &cinf);
/* if something was found */
- if (e > s) {
+ if (f > s) {
/* if whole string was found */
- if (e >= end) {
- start = e;
+ if (f >= end) {
+ start = f;
goto done;
};
/* set new offset and shift start string */
- offs += (e - s);
- s = e;
+ offs += cinf - cin;
+ s = f;
/* if match item, go deeper as long as possible */
if (offs >= item->length && item->childTree.firstPtr) {
/* save previuosly found item (if not ambigous) for
* possible fallback (few greedy match) */
if (item->value != -1) {
- preve = e;
+ prevf = f;
prevItem = item;
prevParent = parent;
}
@@ -119,7 +119,7 @@ TclStrIdxTreeSearch(
continue;
}
/* no children - return this item and current chars found */
- start = e;
+ start = f;
goto done;
}
@@ -131,7 +131,7 @@ TclStrIdxTreeSearch(
if (prevItem != NULL) {
item = prevItem;
parent = prevParent;
- start = preve;
+ start = prevf;
}
done:
diff --git a/generic/tclStrIdxTree.h b/generic/tclStrIdxTree.h
index e80d3db..d2d6f0b 100644
--- a/generic/tclStrIdxTree.h
+++ b/generic/tclStrIdxTree.h
@@ -89,6 +89,41 @@ TclUtfFindEqualNC(
return ret;
}
+inline const char *
+TclUtfFindEqualNCInLwr(
+ register const char *cs, /* UTF string (in anycase) to find in cin. */
+ register const char *cse, /* End of cs */
+ register const char *cin, /* UTF string (in lowercase) will be browsed. */
+ register const char *cine, /* End of cin */
+ const char **cinfnd) /* Return position in cin */
+{
+ register const char *ret = cs;
+ Tcl_UniChar ch1, ch2;
+ do {
+ cs += TclUtfToUniChar(cs, &ch1);
+ cin += TclUtfToUniChar(cin, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ if (ch1 != ch2) break;
+ }
+ *cinfnd = cin;
+ } while ((ret = cs) < cse && cin < cine);
+ return ret;
+}
+
+inline char *
+TclUtfNext(
+ register const char *src) /* The current location in the string. */
+{
+ if (((unsigned char) *(src)) < 0xC0) {
+ return ++src;
+ } else {
+ Tcl_UniChar ch;
+ return src + TclUtfToUniChar(src, &ch);
+ }
+}
+
+
/*
* Primitives to safe set, reset and free references.
*/