From fb0ed853e7c49ff24e17f4cb633876d0780b64b5 Mon Sep 17 00:00:00 2001 From: sebres Date: Tue, 10 Jan 2017 22:38:22 +0000 Subject: lowercase on demand, string index tree can search any-case now, clock scan considered utf-8 char length in words by format parsing --- generic/tclClockFmt.c | 17 +++++++---------- generic/tclStrIdxTree.c | 20 ++++++++++---------- generic/tclStrIdxTree.h | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/generic/tclClockFmt.c b/generic/tclClockFmt.c index e66c525..92040d8 100644 --- a/generic/tclClockFmt.c +++ b/generic/tclClockFmt.c @@ -1245,7 +1245,7 @@ ClockGetOrParseScanFormat( fss->scnTok = tok = ckalloc(sizeof(*tok) * fss->scnTokC); memset(tok, 0, sizeof(*(tok))); - for (p = strFmt; p != e; p++) { + for (p = strFmt; p < e;) { switch (*p) { case '%': if (1) { @@ -1265,6 +1265,7 @@ ClockGetOrParseScanFormat( tok->tokWord.start = p; tok->tokWord.end = p+1; AllocTokenInChain(tok, fss->scnTok, fss->scnTokC); + p++; continue; break; case 'E': @@ -1315,6 +1316,8 @@ ClockGetOrParseScanFormat( } /* next token */ AllocTokenInChain(tok, fss->scnTok, fss->scnTokC); + p++; + continue; } break; case ' ': @@ -1325,6 +1328,8 @@ ClockGetOrParseScanFormat( } tok->map = &ScnSpecTokenMap[cp - ScnSpecTokenMapIndex]; AllocTokenInChain(tok, fss->scnTok, fss->scnTokC); + p++; + continue; break; default: word_tok: @@ -1339,12 +1344,11 @@ word_tok: wordTok->map = &ScnWordTokenMap; AllocTokenInChain(tok, fss->scnTok, fss->scnTokC); } - continue; } break; } - continue; + p = TclUtfNext(p); } /* calculate end distance value for each tokens */ @@ -1468,11 +1472,6 @@ ClockScan( yyMeridian = MER24; - /* lower case given string into new object */ - strObj = Tcl_NewStringObj(TclGetString(strObj), strObj->length); - Tcl_IncrRefCount(strObj); - strObj->length = Tcl_UtfToLower(TclGetString(strObj)); - p = TclGetString(strObj); end = p + strObj->length; /* in strict mode - bypass spaces at begin / end only (not between tokens) */ @@ -1726,8 +1725,6 @@ not_match: done: - Tcl_DecrRefCount(strObj); - return ret; } diff --git a/generic/tclStrIdxTree.c b/generic/tclStrIdxTree.c index f078c7a..afb53e5 100644 --- a/generic/tclStrIdxTree.c +++ b/generic/tclStrIdxTree.c @@ -84,7 +84,7 @@ TclStrIdxTreeSearch( { TclStrIdxTree *parent = tree, *prevParent = tree; TclStrIdx *item = tree->firstPtr, *prevItem = NULL; - const char *s = start, *e, *cin, *preve; + const char *s = start, *f, *cin, *cinf, *prevf; int offs = 0; if (item == NULL) { @@ -94,23 +94,23 @@ TclStrIdxTreeSearch( /* search in tree */ do { cin = TclGetString(item->key) + offs; - e = TclUtfFindEqual(s, end, cin, cin + item->length); + f = TclUtfFindEqualNCInLwr(s, end, cin, cin + item->length, &cinf); /* if something was found */ - if (e > s) { + if (f > s) { /* if whole string was found */ - if (e >= end) { - start = e; + if (f >= end) { + start = f; goto done; }; /* set new offset and shift start string */ - offs += (e - s); - s = e; + offs += cinf - cin; + s = f; /* if match item, go deeper as long as possible */ if (offs >= item->length && item->childTree.firstPtr) { /* save previuosly found item (if not ambigous) for * possible fallback (few greedy match) */ if (item->value != -1) { - preve = e; + prevf = f; prevItem = item; prevParent = parent; } @@ -119,7 +119,7 @@ TclStrIdxTreeSearch( continue; } /* no children - return this item and current chars found */ - start = e; + start = f; goto done; } @@ -131,7 +131,7 @@ TclStrIdxTreeSearch( if (prevItem != NULL) { item = prevItem; parent = prevParent; - start = preve; + start = prevf; } done: diff --git a/generic/tclStrIdxTree.h b/generic/tclStrIdxTree.h index e80d3db..d2d6f0b 100644 --- a/generic/tclStrIdxTree.h +++ b/generic/tclStrIdxTree.h @@ -89,6 +89,41 @@ TclUtfFindEqualNC( return ret; } +inline const char * +TclUtfFindEqualNCInLwr( + register const char *cs, /* UTF string (in anycase) to find in cin. */ + register const char *cse, /* End of cs */ + register const char *cin, /* UTF string (in lowercase) will be browsed. */ + register const char *cine, /* End of cin */ + const char **cinfnd) /* Return position in cin */ +{ + register const char *ret = cs; + Tcl_UniChar ch1, ch2; + do { + cs += TclUtfToUniChar(cs, &ch1); + cin += TclUtfToUniChar(cin, &ch2); + if (ch1 != ch2) { + ch1 = Tcl_UniCharToLower(ch1); + if (ch1 != ch2) break; + } + *cinfnd = cin; + } while ((ret = cs) < cse && cin < cine); + return ret; +} + +inline char * +TclUtfNext( + register const char *src) /* The current location in the string. */ +{ + if (((unsigned char) *(src)) < 0xC0) { + return ++src; + } else { + Tcl_UniChar ch; + return src + TclUtfToUniChar(src, &ch); + } +} + + /* * Primitives to safe set, reset and free references. */ -- cgit v0.12