summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsebres <sebres@users.sourceforge.net>2017-01-10 22:37:40 (GMT)
committersebres <sebres@users.sourceforge.net>2017-01-10 22:37:40 (GMT)
commit2561cb41c0da4522531af13b664373518e0b8008 (patch)
treea3a05ab02ffb63422c5dad4dfd40a9259bc9d39e
parent6a0616f572c4e550427bf6b6e80b874166c5e3fe (diff)
downloadtcl-2561cb41c0da4522531af13b664373518e0b8008.zip
tcl-2561cb41c0da4522531af13b664373518e0b8008.tar.gz
tcl-2561cb41c0da4522531af13b664373518e0b8008.tar.bz2
string index tree for fast greedy search of the string (index) by unique string prefix as key;
clock scan rewritten to use string index tries search;
-rw-r--r--generic/tclClock.c74
-rw-r--r--generic/tclClockFmt.c444
-rw-r--r--generic/tclDate.h22
-rw-r--r--generic/tclStrIdxTree.c519
-rw-r--r--generic/tclStrIdxTree.h134
-rw-r--r--unix/Makefile.in4
-rw-r--r--win/Makefile.in1
-rw-r--r--win/makefile.vc1
8 files changed, 865 insertions, 334 deletions
diff --git a/generic/tclClock.c b/generic/tclClock.c
index 0c08391..e52b2e7 100644
--- a/generic/tclClock.c
+++ b/generic/tclClock.c
@@ -15,6 +15,7 @@
*/
#include "tclInt.h"
+#include "tclStrIdxTree.h"
#include "tclDate.h"
/*
@@ -140,6 +141,10 @@ static unsigned long TzsetGetEpoch(void);
static void TzsetIfNecessary(void);
static void ClockDeleteCmdProc(ClientData);
+static int ClockTestObjCmd(
+ ClientData clientData, Tcl_Interp *interp,
+ int objc, Tcl_Obj *const objv[]);
+
/*
* Structure containing description of "native" clock commands to create.
*/
@@ -169,6 +174,7 @@ static const struct ClockCommand clockCommands[] = {
{ "GetJulianDayFromEraYearWeekDay",
ClockGetjuliandayfromerayearweekdayObjCmd },
{ "ParseFormatArgs", ClockParseformatargsObjCmd },
+ { "_test", TclStrIdxTreeTestObjCmd },
{ NULL, NULL }
};
@@ -584,7 +590,7 @@ ClockMCGet(
}
MODULE_SCOPE Tcl_Obj *
-ClockMCGetListIdxDict(
+ClockMCGetIdx(
ClockFmtScnCmdArgs *opts,
int mcKey)
{
@@ -598,53 +604,45 @@ ClockMCGetListIdxDict(
return NULL;
}
- /* try to get indices dictionray,
- * if not available - create from list */
+ /* try to get indices object */
+ if (dataPtr->mcLitIdxs == NULL) {
+ return NULL;
+ }
if (Tcl_DictObjGet(NULL, opts->mcDictObj,
dataPtr->mcLitIdxs[mcKey], &valObj) != TCL_OK
) {
- Tcl_Obj **lstv, *intObj;
- int i, lstc;
+ return NULL;
+ }
- if (dataPtr->mcLitIdxs == NULL) {
- dataPtr->mcLitIdxs = ckalloc(MCLIT__END * sizeof(Tcl_Obj*));
- for (i = 0; i < MCLIT__END; ++i) {
- Tcl_InitObjRef(dataPtr->mcLitIdxs[i],
- Tcl_NewStringObj(MsgCtLitIdxs[i], -1));
- }
- }
+ return valObj;
+}
- if (Tcl_DictObjGet(opts->interp, opts->mcDictObj,
- dataPtr->mcLiterals[mcKey], &valObj) != TCL_OK) {
- return NULL;
- };
- if (TclListObjGetElements(opts->interp, valObj,
- &lstc, &lstv) != TCL_OK) {
- return NULL;
- };
+MODULE_SCOPE int
+ClockMCSetIdx(
+ ClockFmtScnCmdArgs *opts,
+ int mcKey, Tcl_Obj *valObj)
+{
+ ClockClientData *dataPtr = opts->clientData;
- valObj = Tcl_NewDictObj();
- for (i = 0; i < lstc; i++) {
- intObj = Tcl_NewIntObj(i);
- if (Tcl_DictObjPut(opts->interp, valObj,
- lstv[i], intObj) != TCL_OK
- ) {
- Tcl_DecrRefCount(valObj);
- Tcl_DecrRefCount(intObj);
- return NULL;
- }
- };
+ if (opts->mcDictObj == NULL) {
+ ClockMCDict(opts);
+ if (opts->mcDictObj == NULL)
+ return TCL_ERROR;
+ }
- if (Tcl_DictObjPut(opts->interp, opts->mcDictObj,
- dataPtr->mcLitIdxs[mcKey], valObj) != TCL_OK
- ) {
- Tcl_DecrRefCount(valObj);
- return NULL;
+ /* if literal storage for indices not yet created */
+ if (dataPtr->mcLitIdxs == NULL) {
+ int i;
+ dataPtr->mcLitIdxs = ckalloc(MCLIT__END * sizeof(Tcl_Obj*));
+ for (i = 0; i < MCLIT__END; ++i) {
+ Tcl_InitObjRef(dataPtr->mcLitIdxs[i],
+ Tcl_NewStringObj(MsgCtLitIdxs[i], -1));
}
- };
+ }
- return valObj;
+ return Tcl_DictObjPut(opts->interp, opts->mcDictObj,
+ dataPtr->mcLitIdxs[mcKey], valObj);
}
/*
diff --git a/generic/tclClockFmt.c b/generic/tclClockFmt.c
index 5469ee1..e66c525 100644
--- a/generic/tclClockFmt.c
+++ b/generic/tclClockFmt.c
@@ -11,6 +11,7 @@
*/
#include "tclInt.h"
+#include "tclStrIdxTree.h"
#include "tclDate.h"
/*
@@ -33,6 +34,9 @@ static void ClockFmtScnStorageDelete(ClockFmtScnStorage *fss);
static void ClockFrmScnFinalize(ClientData clientData);
+/* Msgcat index literals prefixed with _IDX_, used for quick dictionary search */
+CLOCK_LOCALE_LITERAL_ARRAY(MsgCtLitIdxs, "_IDX_");
+
/*
* Clock scan and format facilities.
*/
@@ -583,6 +587,139 @@ LocaleListSearch(ClockFmtScnCmdArgs *opts,
minLen, maxLen);
}
+static TclStrIdxTree *
+ClockMCGetListIdxTree(
+ ClockFmtScnCmdArgs *opts,
+ int mcKey)
+{
+ TclStrIdxTree * idxTree;
+ Tcl_Obj *objPtr = ClockMCGetIdx(opts, mcKey);
+ if ( objPtr != NULL
+ && (idxTree = TclStrIdxTreeGetFromObj(objPtr)) != NULL
+ ) {
+ return idxTree;
+
+ } else {
+ /* build new index */
+
+ Tcl_Obj **lstv;
+ int lstc;
+ Tcl_Obj *valObj;
+
+ objPtr = TclStrIdxTreeNewObj();
+ if ((idxTree = TclStrIdxTreeGetFromObj(objPtr)) == NULL) {
+ goto done; /* unexpected, but ...*/
+ }
+
+ valObj = ClockMCGet(opts, mcKey);
+ if (valObj == NULL) {
+ goto done;
+ }
+
+ if (TclListObjGetElements(opts->interp, valObj,
+ &lstc, &lstv) != TCL_OK) {
+ goto done;
+ };
+
+ if (TclStrIdxTreeBuildFromList(idxTree, lstc, lstv) != TCL_OK) {
+ goto done;
+ }
+
+ ClockMCSetIdx(opts, mcKey, objPtr);
+ objPtr = NULL;
+ };
+
+done:
+ if (objPtr) {
+ Tcl_DecrRefCount(objPtr);
+ idxTree = NULL;
+ }
+
+ return idxTree;
+}
+
+static TclStrIdxTree *
+ClockMCGetMultiListIdxTree(
+ ClockFmtScnCmdArgs *opts,
+ int mcKey,
+ int *mcKeys)
+{
+ TclStrIdxTree * idxTree;
+ Tcl_Obj *objPtr = ClockMCGetIdx(opts, mcKey);
+ if ( objPtr != NULL
+ && (idxTree = TclStrIdxTreeGetFromObj(objPtr)) != NULL
+ ) {
+ return idxTree;
+
+ } else {
+ /* build new index */
+
+ Tcl_Obj **lstv;
+ int lstc;
+ Tcl_Obj *valObj;
+
+ objPtr = TclStrIdxTreeNewObj();
+ if ((idxTree = TclStrIdxTreeGetFromObj(objPtr)) == NULL) {
+ goto done; /* unexpected, but ...*/
+ }
+
+ while (*mcKeys) {
+
+ valObj = ClockMCGet(opts, *mcKeys);
+ if (valObj == NULL) {
+ goto done;
+ }
+
+ if (TclListObjGetElements(opts->interp, valObj,
+ &lstc, &lstv) != TCL_OK) {
+ goto done;
+ };
+
+ if (TclStrIdxTreeBuildFromList(idxTree, lstc, lstv) != TCL_OK) {
+ goto done;
+ }
+ mcKeys++;
+ }
+
+ ClockMCSetIdx(opts, mcKey, objPtr);
+ };
+
+done:
+ if (objPtr) {
+ Tcl_DecrRefCount(objPtr);
+ idxTree = NULL;
+ }
+
+ return idxTree;
+}
+
+inline int
+ClockStrIdxTreeSearch(ClockFmtScnCmdArgs *opts,
+ DateInfo *info, TclStrIdxTree *idxTree, int *val,
+ int minLen, int maxLen)
+{
+ const char *f;
+ TclStrIdx *foundItem;
+ f = TclStrIdxTreeSearch(NULL, &foundItem, idxTree,
+ yyInput, yyInput + maxLen);
+
+ if (f <= yyInput || (f - yyInput) < minLen) {
+ /* not found */
+ return TCL_RETURN;
+ }
+ if (foundItem->value == -1) {
+ /* ambigous */
+ return TCL_RETURN;
+ }
+
+ *val = foundItem->value;
+
+ /* shift input pointer */
+ yyInput = f;
+
+ return TCL_OK;
+}
+
static int
StaticListSearch(ClockFmtScnCmdArgs *opts,
DateInfo *info, const char **lst, int *val)
@@ -612,21 +749,19 @@ FindWordEnd(
register const char * p, const char * end)
{
register const char *x = tok->tokWord.start;
- if (x == tok->tokWord.end) { /* single char word */
- if (*p != *x) {
- /* no match -> error */
- return NULL;
+ const char *pfnd;
+ if (x == tok->tokWord.end - 1) { /* fast phase-out for single char word */
+ if (*p == *x) {
+ return ++p;
}
- return ++p;
}
/* multi-char word */
- do
- if (*p++ != *x++) {
- /* no match -> error */
- return NULL;
- }
- while (x <= tok->tokWord.end && p < end);
- return p;
+ x = TclUtfFindEqualNC(x, tok->tokWord.end, p, end, &pfnd);
+ if (x < tok->tokWord.end) {
+ /* no match -> error */
+ return NULL;
+ }
+ return pfnd;
}
static int
@@ -822,11 +957,18 @@ ClockScnToken_LocaleListMatcher_Proc(ClockFmtScnCmdArgs *opts,
{
int ret, val;
int minLen, maxLen;
+ TclStrIdxTree *idxTree;
DetermineGreedySearchLen(opts, info, tok, &minLen, &maxLen);
- ret = LocaleListSearch(opts, info, (int)tok->map->data, &val,
- minLen, maxLen);
+ /* get or create tree in msgcat dict */
+
+ idxTree = ClockMCGetListIdxTree(opts, (int)tok->map->data /* mcKey */);
+ if (idxTree == NULL) {
+ return TCL_ERROR;
+ }
+
+ ret = ClockStrIdxTreeSearch(opts, info, idxTree, &val, minLen, maxLen);
if (ret != TCL_OK) {
return ret;
}
@@ -1120,7 +1262,8 @@ ClockGetOrParseScanFormat(
/* begin new word token - don't join with previous word token,
* because current mapping should be "...%%..." -> "...%..." */
tok->map = &ScnWordTokenMap;
- tok->tokWord.start = tok->tokWord.end = p;
+ tok->tokWord.start = p;
+ tok->tokWord.end = p+1;
AllocTokenInChain(tok, fss->scnTok, fss->scnTokC);
continue;
break;
@@ -1190,7 +1333,7 @@ word_tok:
if (tok > fss->scnTok && (tok-1)->map == &ScnWordTokenMap) {
wordTok = tok-1;
}
- wordTok->tokWord.end = p;
+ wordTok->tokWord.end = p+1;
if (wordTok == tok) {
wordTok->tokWord.start = p;
wordTok->map = &ScnWordTokenMap;
@@ -1214,7 +1357,7 @@ word_tok:
if (prevTok->map->type != CTOKT_WORD) {
endDist += prevTok->map->minSize;
} else {
- endDist += prevTok->tokWord.end - prevTok->tokWord.start + 1;
+ endDist += prevTok->tokWord.end - prevTok->tokWord.start;
}
prevTok--;
}
@@ -1325,6 +1468,11 @@ ClockScan(
yyMeridian = MER24;
+ /* lower case given string into new object */
+ strObj = Tcl_NewStringObj(TclGetString(strObj), strObj->length);
+ Tcl_IncrRefCount(strObj);
+ strObj->length = Tcl_UtfToLower(TclGetString(strObj));
+
p = TclGetString(strObj);
end = p + strObj->length;
/* in strict mode - bypass spaces at begin / end only (not between tokens) */
@@ -1578,6 +1726,8 @@ not_match:
done:
+ Tcl_DecrRefCount(strObj);
+
return ret;
}
@@ -1604,266 +1754,6 @@ ClockFormat(
return TCL_ERROR;
}
*/
-#if 0
- /* prepare parsing */
-
- yyMeridian = MER24;
-
- p = TclGetString(strObj);
- end = p + strObj->length;
- /* in strict mode - bypass spaces at begin / end only (not between tokens) */
- if (opts->flags & CLF_STRICT) {
- while (p < end && isspace(UCHAR(*p))) {
- p++;
- }
- }
- info->dateStart = yyInput = p;
- info->dateEnd = end;
-
- /* parse string */
- for (; tok->map != NULL; tok++) {
- map = tok->map;
- /* bypass spaces at begin of input before parsing each token */
- if ( !(opts->flags & CLF_STRICT)
- && (map->type != CTOKT_SPACE && map->type != CTOKT_WORD)
- ) {
- while (p < end && isspace(UCHAR(*p))) {
- p++;
- }
- }
- yyInput = p;
- switch (map->type)
- {
- case CTOKT_DIGIT:
- if (1) {
- int size = map->maxSize;
- int sign = 1;
- if (map->flags & CLF_SIGNED) {
- if (*p == '+') { yyInput = ++p; }
- else
- if (*p == '-') { yyInput = ++p; sign = -1; };
- }
- /* greedy find digits (look for forward digits consider spaces),
- * corresponding pre-calculated lookAhead */
- if (size != map->minSize && tok->lookAhead) {
- int spcnt = 0;
- const char *pe;
- size += tok->lookAhead;
- x = p + size; if (x > end) { x = end; };
- pe = x;
- while (p < x) {
- if (isspace(UCHAR(*p))) {
- if (pe > p) { pe = p; };
- if (x < end) x++;
- p++;
- spcnt++;
- continue;
- }
- if (isdigit(UCHAR(*p))) {
- p++;
- continue;
- }
- break;
- }
- /* consider reserved (lookAhead) for next tokens */
- p -= tok->lookAhead + spcnt;
- if (p > pe) {
- p = pe;
- }
- } else {
- x = p + size; if (x > end) { x = end; };
- while (isdigit(UCHAR(*p)) && p < x) { p++; };
- }
- size = p - yyInput;
- if (size < map->minSize) {
- /* missing input -> error */
- goto not_match;
- }
- /* string 2 number, put number into info structure by offset */
- p = yyInput; x = p + size;
- if (!(map->flags & CLF_LOCALSEC)) {
- if (_str2int((time_t *)(((char *)info) + map->offs),
- p, x, sign) != TCL_OK) {
- goto overflow;
- }
- p = x;
- } else {
- if (_str2wideInt((Tcl_WideInt *)(((char *)info) + map->offs),
- p, x, sign) != TCL_OK) {
- goto overflow;
- }
- p = x;
- }
- flags = (flags & ~map->clearFlags) | map->flags;
- }
- break;
- case CTOKT_PARSER:
- switch (map->parser(opts, info, tok)) {
- case TCL_OK:
- break;
- case TCL_RETURN:
- goto not_match;
- break;
- default:
- goto done;
- break;
- };
- p = yyInput;
- flags = (flags & ~map->clearFlags) | map->flags;
- break;
- case CTOKT_SPACE:
- /* at least one space in strict mode */
- if (opts->flags & CLF_STRICT) {
- if (!isspace(UCHAR(*p))) {
- /* unmatched -> error */
- goto not_match;
- }
- p++;
- }
- while (p < end && isspace(UCHAR(*p))) {
- p++;
- }
- break;
- case CTOKT_WORD:
- x = FindWordEnd(tok, p, end);
- if (!x) {
- /* no match -> error */
- goto not_match;
- }
- p = x;
- continue;
- break;
- }
- }
-
- /* ignore spaces at end */
- while (p < end && isspace(UCHAR(*p))) {
- p++;
- }
- /* check end was reached */
- if (p < end) {
- /* something after last token - wrong format */
- goto not_match;
- }
-
- /*
- * Invalidate result
- */
-
- /* seconds token (%s) take precedence over all other tokens */
- if ((opts->flags & CLF_EXTENDED) || !(flags & CLF_LOCALSEC)) {
- if (flags & CLF_DATE) {
-
- if (!(flags & CLF_JULIANDAY)) {
- info->flags |= CLF_ASSEMBLE_SECONDS|CLF_ASSEMBLE_JULIANDAY;
-
- /* dd precedence below ddd */
- switch (flags & (CLF_MONTH|CLF_DAYOFYEAR|CLF_DAYOFMONTH)) {
- case (CLF_DAYOFYEAR|CLF_DAYOFMONTH):
- /* miss month: ddd over dd (without month) */
- flags &= ~CLF_DAYOFMONTH;
- case (CLF_DAYOFYEAR):
- /* ddd over naked weekday */
- if (!(flags & CLF_ISO8601YEAR)) {
- flags &= ~CLF_ISO8601;
- }
- break;
- case (CLF_MONTH|CLF_DAYOFYEAR|CLF_DAYOFMONTH):
- /* both available: mmdd over ddd */
- flags &= ~CLF_DAYOFYEAR;
- case (CLF_MONTH|CLF_DAYOFMONTH):
- case (CLF_DAYOFMONTH):
- /* mmdd / dd over naked weekday */
- if (!(flags & CLF_ISO8601YEAR)) {
- flags &= ~CLF_ISO8601;
- }
- break;
- }
-
- /* YearWeekDay below YearMonthDay */
- if ( (flags & CLF_ISO8601)
- && ( (flags & (CLF_YEAR|CLF_DAYOFYEAR)) == (CLF_YEAR|CLF_DAYOFYEAR)
- || (flags & (CLF_YEAR|CLF_DAYOFMONTH|CLF_MONTH)) == (CLF_YEAR|CLF_DAYOFMONTH|CLF_MONTH)
- )
- ) {
- /* yy precedence below yyyy */
- if (!(flags & CLF_ISO8601CENTURY) && (flags & CLF_CENTURY)) {
- /* normally precedence of ISO is higher, but no century - so put it down */
- flags &= ~CLF_ISO8601;
- }
- else
- /* yymmdd or yyddd over naked weekday */
- if (!(flags & CLF_ISO8601YEAR)) {
- flags &= ~CLF_ISO8601;
- }
- }
-
- if (!(flags & CLF_ISO8601)) {
- if (yyYear < 100) {
- if (!(flags & CLF_CENTURY)) {
- if (yyYear >= dataPtr->yearOfCenturySwitch) {
- yyYear -= 100;
- }
- yyYear += dataPtr->currentYearCentury;
- } else {
- yyYear += info->dateCentury * 100;
- }
- }
- } else {
- if (info->date.iso8601Year < 100) {
- if (!(flags & CLF_ISO8601CENTURY)) {
- if (info->date.iso8601Year >= dataPtr->yearOfCenturySwitch) {
- info->date.iso8601Year -= 100;
- }
- info->date.iso8601Year += dataPtr->currentYearCentury;
- } else {
- info->date.iso8601Year += info->dateCentury * 100;
- }
- }
- }
- }
- }
-
- /* if no time - reset time */
- if (!(flags & (CLF_TIME|CLF_LOCALSEC))) {
- info->flags |= CLF_ASSEMBLE_SECONDS;
- yydate.localSeconds = 0;
- }
-
- if (flags & CLF_TIME) {
- info->flags |= CLF_ASSEMBLE_SECONDS;
- yySeconds = ToSeconds(yyHour, yyMinutes,
- yySeconds, yyMeridian);
- } else
- if (!(flags & CLF_LOCALSEC)) {
- info->flags |= CLF_ASSEMBLE_SECONDS;
- yySeconds = yydate.localSeconds % SECONDS_PER_DAY;
- }
- }
-
- /* tell caller which flags were set */
- info->flags |= flags;
-
- ret = TCL_OK;
- goto done;
-
-overflow:
-
- Tcl_SetResult(interp, "requested date too large to represent",
- TCL_STATIC);
- Tcl_SetErrorCode(interp, "CLOCK", "dateTooLarge", NULL);
- goto done;
-
-not_match:
-
- Tcl_SetResult(interp, "input string does not match supplied format",
- TCL_STATIC);
- Tcl_SetErrorCode(interp, "CLOCK", "badInputString", NULL);
-
-done:
-
- return ret;
-#endif
}
diff --git a/generic/tclDate.h b/generic/tclDate.h
index 112ed31..2728dd3 100644
--- a/generic/tclDate.h
+++ b/generic/tclDate.h
@@ -126,24 +126,6 @@ typedef enum ClockMsgCtLiteral {
}
/*
- * Primitives to safe set, reset and free references.
- */
-
-#define Tcl_UnsetObjRef(obj) \
- if (obj != NULL) { Tcl_DecrRefCount(obj); obj = NULL; }
-#define Tcl_InitObjRef(obj, val) \
- obj = val; if (obj) { Tcl_IncrRefCount(obj); }
-#define Tcl_SetObjRef(obj, val) \
-if (1) { \
- Tcl_Obj *nval = val; \
- if (obj != nval) { \
- Tcl_Obj *prev = obj; \
- Tcl_InitObjRef(obj, nval); \
- if (prev != NULL) { Tcl_DecrRefCount(prev); }; \
- } \
-}
-
-/*
* Structure containing the fields used in [clock format] and [clock scan]
*/
@@ -450,7 +432,9 @@ MODULE_SCOPE Tcl_Obj *
MODULE_SCOPE Tcl_Obj *
ClockMCGet(ClockFmtScnCmdArgs *opts, int mcKey);
MODULE_SCOPE Tcl_Obj *
- ClockMCGetListIdxDict(ClockFmtScnCmdArgs *opts, int mcKey);
+ ClockMCGetIdx(ClockFmtScnCmdArgs *opts, int mcKey);
+MODULE_SCOPE int ClockMCSetIdx(ClockFmtScnCmdArgs *opts, int mcKey,
+ Tcl_Obj *valObj);
/* tclClockFmt.c module declarations */
diff --git a/generic/tclStrIdxTree.c b/generic/tclStrIdxTree.c
new file mode 100644
index 0000000..f078c7a
--- /dev/null
+++ b/generic/tclStrIdxTree.c
@@ -0,0 +1,519 @@
+/*
+ * tclStrIdxTree.c --
+ *
+ * Contains the routines for managing string index tries in Tcl.
+ *
+ * This code is back-ported from the tclSE engine, by Serg G. Brester.
+ *
+ * Copyright (c) 2016 by Sergey G. Brester aka sebres. All rights reserved.
+ *
+ * See the file "license.terms" for information on usage and redistribution of
+ * this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ *
+ * -----------------------------------------------------------------------
+ *
+ * String index tries are prepaired structures used for fast greedy search of the string
+ * (index) by unique string prefix as key.
+ *
+ * Index tree build for two lists together can be explained in the following datagram
+ *
+ * Lists:
+ *
+ * {Januar Februar Maerz April Mai Juni Juli August September Oktober November Dezember}
+ * {Jnr Fbr Mrz Apr Mai Jni Jli Agt Spt Okt Nvb Dzb}
+ *
+ * Index-Tree:
+ *
+ * j -1 * ...
+ * anuar 0 *
+ * u -1 * a -1
+ * ni 5 * pril 3
+ * li 6 * ugust 7
+ * n -1 * gt 7
+ * r 0 * s 8
+ * i 5 * eptember 8
+ * li 6 * pt 8
+ * f 1 * oktober 9
+ * ebruar 1 * n 10
+ * br 1 * ovember 10
+ * m -1 * vb 10
+ * a -1 * d 11
+ * erz 2 * ezember 11
+ * i 4 * zb 11
+ * rz 2 *
+ * ...
+ *
+ * Thereby value -1 shows pure group items (corresponding ambigous matches).
+ *
+ * StrIdxTree's are very fast, so:
+ * build of above-mentioned tree takes about 10 microseconds.
+ * search of string index in this tree takes fewer as 0.1 microseconds.
+ *
+ */
+
+#include "tclInt.h"
+#include "tclStrIdxTree.h"
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclStrIdxTreeSearch --
+ *
+ * Find largest part of string "start" in indexed tree (case sensitive).
+ *
+ * Also used for building of string index tree.
+ *
+ * Results:
+ * Return position of UTF character in start after last equal character
+ * and found item (with parent).
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+MODULE_SCOPE const char*
+TclStrIdxTreeSearch(
+ TclStrIdxTree **foundParent, /* Return value of found sub tree (used for tree build) */
+ TclStrIdx **foundItem, /* Return value of found item */
+ TclStrIdxTree *tree, /* Index tree will be browsed */
+ const char *start, /* UTF string to find in tree */
+ const char *end) /* End of string */
+{
+ TclStrIdxTree *parent = tree, *prevParent = tree;
+ TclStrIdx *item = tree->firstPtr, *prevItem = NULL;
+ const char *s = start, *e, *cin, *preve;
+ int offs = 0;
+
+ if (item == NULL) {
+ goto done;
+ }
+
+ /* search in tree */
+ do {
+ cin = TclGetString(item->key) + offs;
+ e = TclUtfFindEqual(s, end, cin, cin + item->length);
+ /* if something was found */
+ if (e > s) {
+ /* if whole string was found */
+ if (e >= end) {
+ start = e;
+ goto done;
+ };
+ /* set new offset and shift start string */
+ offs += (e - s);
+ s = e;
+ /* if match item, go deeper as long as possible */
+ if (offs >= item->length && item->childTree.firstPtr) {
+ /* save previuosly found item (if not ambigous) for
+ * possible fallback (few greedy match) */
+ if (item->value != -1) {
+ preve = e;
+ prevItem = item;
+ prevParent = parent;
+ }
+ parent = &item->childTree;
+ item = item->childTree.firstPtr;
+ continue;
+ }
+ /* no children - return this item and current chars found */
+ start = e;
+ goto done;
+ }
+
+ item = item->nextPtr;
+
+ } while (item != NULL);
+
+ /* fallback (few greedy match) not ambigous (has a value) */
+ if (prevItem != NULL) {
+ item = prevItem;
+ parent = prevParent;
+ start = preve;
+ }
+
+done:
+
+ if (foundParent)
+ *foundParent = parent;
+ if (foundItem)
+ *foundItem = item;
+ return start;
+}
+
+MODULE_SCOPE void
+TclStrIdxTreeFree(
+ TclStrIdx *tree)
+{
+ while (tree != NULL) {
+ TclStrIdx *t;
+ Tcl_DecrRefCount(tree->key);
+ if (tree->childTree.firstPtr != NULL) {
+ TclStrIdxTreeFree(tree->childTree.firstPtr);
+ }
+ t = tree, tree = tree->nextPtr;
+ ckfree(t);
+ }
+}
+
+/*
+ * Several bidirectional list primitives
+ */
+inline void
+TclStrIdxTreeInsertBranch(
+ TclStrIdxTree *parent,
+ register TclStrIdx *item,
+ register TclStrIdx *child)
+{
+ if (parent->firstPtr == child)
+ parent->firstPtr = item;
+ if (parent->lastPtr == child)
+ parent->lastPtr = item;
+ if (item->nextPtr = child->nextPtr) {
+ item->nextPtr->prevPtr = item;
+ child->nextPtr = NULL;
+ }
+ if (item->prevPtr = child->prevPtr) {
+ item->prevPtr->nextPtr = item;
+ child->prevPtr = NULL;
+ }
+ item->childTree.firstPtr = child;
+ item->childTree.lastPtr = child;
+}
+
+inline void
+TclStrIdxTreeAppend(
+ register TclStrIdxTree *parent,
+ register TclStrIdx *item)
+{
+ if (parent->lastPtr != NULL) {
+ parent->lastPtr->nextPtr = item;
+ }
+ item->prevPtr = parent->lastPtr;
+ item->nextPtr = NULL;
+ parent->lastPtr = item;
+ if (parent->firstPtr == NULL) {
+ parent->firstPtr = item;
+ }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclStrIdxTreeBuildFromList --
+ *
+ * Build or extend string indexed tree from tcl list.
+ *
+ * Important: by multiple lists, optimal tree can be created only if list with
+ * larger strings used firstly.
+ *
+ * Results:
+ * Returns a standard Tcl result.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+MODULE_SCOPE int
+TclStrIdxTreeBuildFromList(
+ TclStrIdxTree *idxTree,
+ int lstc,
+ Tcl_Obj **lstv)
+{
+ Tcl_Obj **lwrv;
+ int i, ret = TCL_ERROR;
+ const char *s, *e, *f;
+ TclStrIdx *item;
+
+ /* create lowercase reflection of the list keys */
+
+ lwrv = ckalloc(sizeof(Tcl_Obj*) * lstc);
+ if (lwrv == NULL) {
+ return TCL_ERROR;
+ }
+ for (i = 0; i < lstc; i++) {
+ lwrv[i] = Tcl_DuplicateObj(lstv[i]);
+ if (lwrv[i] == NULL) {
+ return TCL_ERROR;
+ }
+ Tcl_IncrRefCount(lwrv[i]);
+ lwrv[i]->length = Tcl_UtfToLower(TclGetString(lwrv[i]));
+ }
+
+ /* build index tree of the list keys */
+ for (i = 0; i < lstc; i++) {
+ TclStrIdxTree *foundParent = idxTree;
+ e = s = TclGetString(lwrv[i]);
+ e += lwrv[i]->length;
+
+ /* ignore empty values (impossible to index it) */
+ if (lwrv[i]->length == 0) continue;
+
+ item = NULL;
+ if (idxTree->firstPtr != NULL) {
+ TclStrIdx *foundItem;
+ f = TclStrIdxTreeSearch(&foundParent, &foundItem,
+ idxTree, s, e);
+ /* if common prefix was found */
+ if (f > s) {
+ /* ignore element if fulfilled or ambigous */
+ if (f == e) {
+ continue;
+ }
+ /* if shortest key was found with the same value,
+ * just replace its current key with longest key */
+ if ( foundItem->value == i
+ && foundItem->length < lwrv[i]->length
+ && foundItem->childTree.firstPtr == NULL
+ ) {
+ Tcl_SetObjRef(foundItem->key, lwrv[i]);
+ foundItem->length = lwrv[i]->length;
+ continue;
+ }
+ /* split tree (e. g. j->(jan,jun) + jul == j->(jan,ju->(jun,jul)) )
+ * but don't split by fulfilled child of found item ( ii->iii->iiii ) */
+ if (foundItem->length != (f - s)) {
+ /* first split found item (insert one between parent and found + new one) */
+ item = ckalloc(sizeof(*item));
+ if (item == NULL) {
+ goto done;
+ }
+ Tcl_InitObjRef(item->key, foundItem->key);
+ item->length = f - s;
+ /* set value or mark as ambigous if not the same value of both */
+ item->value = (foundItem->value == i) ? i : -1;
+ /* insert group item between foundParent and foundItem */
+ TclStrIdxTreeInsertBranch(foundParent, item, foundItem);
+ foundParent = &item->childTree;
+ } else {
+ /* the new item should be added as child of found item */
+ foundParent = &foundItem->childTree;
+ }
+ }
+ }
+ /* append item at end of found parent */
+ item = ckalloc(sizeof(*item));
+ if (item == NULL) {
+ goto done;
+ }
+ item->childTree.lastPtr = item->childTree.firstPtr = NULL;
+ Tcl_InitObjRef(item->key, lwrv[i]);
+ item->length = lwrv[i]->length;
+ item->value = i;
+ TclStrIdxTreeAppend(foundParent, item);
+ };
+
+ ret = TCL_OK;
+
+done:
+
+ if (lwrv != NULL) {
+ for (i = 0; i < lstc; i++) {
+ Tcl_DecrRefCount(lwrv[i]);
+ }
+ ckfree(lwrv);
+ }
+
+ if (ret != TCL_OK) {
+ if (idxTree->firstPtr != NULL) {
+ TclStrIdxTreeFree(idxTree->firstPtr);
+ }
+ }
+
+ return ret;
+}
+
+
+static void
+StrIdxTreeObj_DupIntRepProc(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr);
+static void
+StrIdxTreeObj_FreeIntRepProc(Tcl_Obj *objPtr);
+static void
+StrIdxTreeObj_UpdateStringProc(Tcl_Obj *objPtr);
+
+Tcl_ObjType StrIdxTreeObjType = {
+ "str-idx-tree", /* name */
+ StrIdxTreeObj_FreeIntRepProc, /* freeIntRepProc */
+ StrIdxTreeObj_DupIntRepProc, /* dupIntRepProc */
+ StrIdxTreeObj_UpdateStringProc, /* updateStringProc */
+ NULL /* setFromAnyProc */
+};
+
+MODULE_SCOPE Tcl_Obj*
+TclStrIdxTreeNewObj()
+{
+ Tcl_Obj *objPtr = Tcl_NewObj();
+ objPtr->internalRep.twoPtrValue.ptr1 = NULL;
+ objPtr->internalRep.twoPtrValue.ptr2 = NULL;
+ objPtr->typePtr = &StrIdxTreeObjType;
+ /* return tree root in internal representation */
+ return objPtr;
+}
+
+static void
+StrIdxTreeObj_DupIntRepProc(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr)
+{
+ /* follow links (smart pointers) */
+ if ( srcPtr->internalRep.twoPtrValue.ptr1 != NULL
+ && srcPtr->internalRep.twoPtrValue.ptr2 == NULL
+ ) {
+ srcPtr = (Tcl_Obj*)srcPtr->internalRep.twoPtrValue.ptr1;
+ }
+ /* create smart pointer to it (ptr1 != NULL, ptr2 = NULL) */
+ Tcl_InitObjRef(((Tcl_Obj *)copyPtr->internalRep.twoPtrValue.ptr1),
+ srcPtr);
+ copyPtr->internalRep.twoPtrValue.ptr2 = NULL;
+ copyPtr->typePtr = &StrIdxTreeObjType;
+}
+
+static void
+StrIdxTreeObj_FreeIntRepProc(Tcl_Obj *objPtr)
+{
+ /* follow links (smart pointers) */
+ if ( objPtr->internalRep.twoPtrValue.ptr1 != NULL
+ && objPtr->internalRep.twoPtrValue.ptr2 == NULL
+ ) {
+ /* is a link */
+ Tcl_UnsetObjRef(((Tcl_Obj *)objPtr->internalRep.twoPtrValue.ptr1));
+ } else {
+ /* is a tree */
+ TclStrIdxTree *tree = (TclStrIdxTree*)&objPtr->internalRep.twoPtrValue.ptr1;
+ if (tree->firstPtr != NULL) {
+ TclStrIdxTreeFree(tree->firstPtr);
+ }
+ objPtr->internalRep.twoPtrValue.ptr1 = NULL;
+ objPtr->internalRep.twoPtrValue.ptr2 = NULL;
+ }
+ objPtr->typePtr = NULL;
+};
+
+static void
+StrIdxTreeObj_UpdateStringProc(Tcl_Obj *objPtr)
+{
+ /* currently only dummy empty string possible */
+ objPtr->length = 0;
+ objPtr->bytes = tclEmptyStringRep;
+};
+
+MODULE_SCOPE TclStrIdxTree *
+TclStrIdxTreeGetFromObj(Tcl_Obj *objPtr) {
+ /* follow links (smart pointers) */
+ if (objPtr->typePtr != &StrIdxTreeObjType) {
+ return NULL;
+ }
+ if ( objPtr->internalRep.twoPtrValue.ptr1 != NULL
+ && objPtr->internalRep.twoPtrValue.ptr2 == NULL
+ ) {
+ objPtr = (Tcl_Obj*)objPtr->internalRep.twoPtrValue.ptr1;
+ }
+ /* return tree root in internal representation */
+ return (TclStrIdxTree*)&objPtr->internalRep.twoPtrValue.ptr1;
+}
+
+/*
+ * Several debug primitives
+ */
+#if 1
+
+void
+TclStrIdxTreePrint(
+ Tcl_Interp *interp,
+ TclStrIdx *tree,
+ int offs)
+{
+ Tcl_Obj *obj[2];
+ const char *s;
+ Tcl_InitObjRef(obj[0], Tcl_NewStringObj("::puts", -1));
+ while (tree != NULL) {
+ s = TclGetString(tree->key) + offs;
+ Tcl_InitObjRef(obj[1], Tcl_ObjPrintf("%*s%.*s\t:%d",
+ offs, "", tree->length - offs, s, tree->value));
+ Tcl_PutsObjCmd(NULL, interp, 2, obj);
+ Tcl_UnsetObjRef(obj[1]);
+ if (tree->childTree.firstPtr != NULL) {
+ TclStrIdxTreePrint(interp, tree->childTree.firstPtr, tree->length);
+ }
+ tree = tree->nextPtr;
+ }
+ Tcl_UnsetObjRef(obj[0]);
+}
+
+
+MODULE_SCOPE int
+TclStrIdxTreeTestObjCmd(
+ ClientData clientData, Tcl_Interp *interp,
+ int objc, Tcl_Obj *const objv[])
+{
+ const char *cs, *cin, *ret;
+
+ static const char *const options[] = {
+ "index", "puts-index", "findequal",
+ NULL
+ };
+ enum optionInd {
+ O_INDEX, O_PUTS_INDEX, O_FINDEQUAL
+ };
+ int optionIndex;
+
+ if (objc < 2) {
+ Tcl_SetResult(interp, "wrong # args", TCL_STATIC);
+ return TCL_ERROR;
+ }
+ if (Tcl_GetIndexFromObj(interp, objv[1], options,
+ "option", 0, &optionIndex) != TCL_OK) {
+ Tcl_SetErrorCode(interp, "CLOCK", "badOption",
+ Tcl_GetString(objv[1]), NULL);
+ return TCL_ERROR;
+ }
+ switch (optionIndex) {
+ case O_FINDEQUAL:
+ if (objc < 4) {
+ Tcl_SetResult(interp, "wrong # args", TCL_STATIC);
+ return TCL_ERROR;
+ }
+ cs = TclGetString(objv[2]);
+ cin = TclGetString(objv[3]);
+ ret = TclUtfFindEqual(
+ cs, cs + objv[1]->length, cin, cin + objv[2]->length);
+ Tcl_SetObjResult(interp, Tcl_NewIntObj(ret - cs));
+ break;
+ case O_INDEX:
+ case O_PUTS_INDEX:
+
+ if (1) {
+ Tcl_Obj **lstv;
+ int i, lstc;
+ TclStrIdxTree idxTree = {NULL, NULL};
+ i = 1;
+ while (++i < objc) {
+ if (TclListObjGetElements(interp, objv[i],
+ &lstc, &lstv) != TCL_OK) {
+ return TCL_ERROR;
+ };
+ TclStrIdxTreeBuildFromList(&idxTree, lstc, lstv);
+ }
+ if (optionIndex == O_PUTS_INDEX) {
+ TclStrIdxTreePrint(interp, idxTree.firstPtr, 0);
+ }
+ TclStrIdxTreeFree(idxTree.firstPtr);
+ }
+ break;
+ }
+
+ return TCL_OK;
+}
+
+#endif
+
+/*
+ * Local Variables:
+ * mode: c
+ * c-basic-offset: 4
+ * fill-column: 78
+ * End:
+ */
diff --git a/generic/tclStrIdxTree.h b/generic/tclStrIdxTree.h
new file mode 100644
index 0000000..e80d3db
--- /dev/null
+++ b/generic/tclStrIdxTree.h
@@ -0,0 +1,134 @@
+/*
+ * tclStrIdxTree.h --
+ *
+ * Declarations of string index tries and other primitives currently
+ * back-ported from tclSE.
+ *
+ * Copyright (c) 2016 Serg G. Brester (aka sebres)
+ *
+ * See the file "license.terms" for information on usage and redistribution
+ * of this file, and for a DISCLAIMER OF ALL WARRANTIES.
+ */
+
+#ifndef _TCLSTRIDXTREE_H
+#define _TCLSTRIDXTREE_H
+
+
+/*
+ * Main structures declarations of index tree and entry
+ */
+
+typedef struct TclStrIdxTree {
+ struct TclStrIdx *firstPtr;
+ struct TclStrIdx *lastPtr;
+} TclStrIdxTree;
+
+typedef struct TclStrIdx {
+ struct TclStrIdxTree childTree;
+ struct TclStrIdx *nextPtr;
+ struct TclStrIdx *prevPtr;
+ Tcl_Obj *key;
+ int length;
+ int value;
+} TclStrIdx;
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * TclUtfFindEqual, TclUtfFindEqualNC --
+ *
+ * Find largest part of string cs in string cin (case sensitive and not).
+ *
+ * Results:
+ * Return position of UTF character in cs after last equal character.
+ *
+ * Side effects:
+ * None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+inline const char *
+TclUtfFindEqual(
+ register const char *cs, /* UTF string to find in cin. */
+ register const char *cse, /* End of cs */
+ register const char *cin, /* UTF string will be browsed. */
+ register const char *cine) /* End of cin */
+{
+ register const char *ret = cs;
+ Tcl_UniChar ch1, ch2;
+ do {
+ cs += TclUtfToUniChar(cs, &ch1);
+ cin += TclUtfToUniChar(cin, &ch2);
+ if (ch1 != ch2) break;
+ } while ((ret = cs) < cse && cin < cine);
+ return ret;
+}
+
+inline const char *
+TclUtfFindEqualNC(
+ register const char *cs, /* UTF string to find in cin. */
+ register const char *cse, /* End of cs */
+ register const char *cin, /* UTF string will be browsed. */
+ register const char *cine, /* End of cin */
+ const char **cinfnd) /* Return position in cin */
+{
+ register const char *ret = cs;
+ Tcl_UniChar ch1, ch2;
+ do {
+ cs += TclUtfToUniChar(cs, &ch1);
+ cin += TclUtfToUniChar(cin, &ch2);
+ if (ch1 != ch2) {
+ ch1 = Tcl_UniCharToLower(ch1);
+ ch2 = Tcl_UniCharToLower(ch2);
+ if (ch1 != ch2) break;
+ }
+ *cinfnd = cin;
+ } while ((ret = cs) < cse && cin < cine);
+ return ret;
+}
+
+/*
+ * Primitives to safe set, reset and free references.
+ */
+
+#define Tcl_UnsetObjRef(obj) \
+ if (obj != NULL) { Tcl_DecrRefCount(obj); obj = NULL; }
+#define Tcl_InitObjRef(obj, val) \
+ obj = val; if (obj) { Tcl_IncrRefCount(obj); }
+#define Tcl_SetObjRef(obj, val) \
+if (1) { \
+ Tcl_Obj *nval = val; \
+ if (obj != nval) { \
+ Tcl_Obj *prev = obj; \
+ Tcl_InitObjRef(obj, nval); \
+ if (prev != NULL) { Tcl_DecrRefCount(prev); }; \
+ } \
+}
+
+/*
+ * Prototypes of module functions.
+ */
+
+MODULE_SCOPE const char*
+ TclStrIdxTreeSearch(TclStrIdxTree **foundParent,
+ TclStrIdx **foundItem, TclStrIdxTree *tree,
+ const char *start, const char *end);
+
+MODULE_SCOPE int TclStrIdxTreeBuildFromList(TclStrIdxTree *idxTree,
+ int lstc, Tcl_Obj **lstv);
+
+MODULE_SCOPE Tcl_Obj*
+ TclStrIdxTreeNewObj();
+
+MODULE_SCOPE TclStrIdxTree*
+ TclStrIdxTreeGetFromObj(Tcl_Obj *objPtr);
+
+#if 1
+
+MODULE_SCOPE int TclStrIdxTreeTestObjCmd(ClientData, Tcl_Interp *,
+ int, Tcl_Obj *const objv[]);
+#endif
+
+#endif /* _TCLSTRIDXTREE_H */
diff --git a/unix/Makefile.in b/unix/Makefile.in
index b220139..19ab6ec 100644
--- a/unix/Makefile.in
+++ b/unix/Makefile.in
@@ -451,6 +451,7 @@ GENERIC_SRCS = \
$(GENERIC_DIR)/tclScan.c \
$(GENERIC_DIR)/tclStubInit.c \
$(GENERIC_DIR)/tclStringObj.c \
+ $(GENERIC_DIR)/tclStrIdxTree.c \
$(GENERIC_DIR)/tclStrToD.c \
$(GENERIC_DIR)/tclTest.c \
$(GENERIC_DIR)/tclTestObj.c \
@@ -1305,6 +1306,9 @@ tclScan.o: $(GENERIC_DIR)/tclScan.c
tclStringObj.o: $(GENERIC_DIR)/tclStringObj.c $(MATHHDRS)
$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclStringObj.c
+tclStrIdxTree.o: $(GENERIC_DIR)/tclStrIdxTree.c $(MATHHDRS)
+ $(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclStrIdxTree.c
+
tclStrToD.o: $(GENERIC_DIR)/tclStrToD.c $(MATHHDRS)
$(CC) -c $(CC_SWITCHES) $(GENERIC_DIR)/tclStrToD.c
diff --git a/win/Makefile.in b/win/Makefile.in
index 82e5516..478bbb9 100644
--- a/win/Makefile.in
+++ b/win/Makefile.in
@@ -291,6 +291,7 @@ GENERIC_OBJS = \
tclResult.$(OBJEXT) \
tclScan.$(OBJEXT) \
tclStringObj.$(OBJEXT) \
+ tclStrIdxTree.$(OBJEXT) \
tclStrToD.$(OBJEXT) \
tclStubInit.$(OBJEXT) \
tclThread.$(OBJEXT) \
diff --git a/win/makefile.vc b/win/makefile.vc
index d6dbf85..48bacbc 100644
--- a/win/makefile.vc
+++ b/win/makefile.vc
@@ -333,6 +333,7 @@ COREOBJS = \
$(TMP_DIR)\tclResult.obj \
$(TMP_DIR)\tclScan.obj \
$(TMP_DIR)\tclStringObj.obj \
+ $(TMP_DIR)\tclStrIdxTree.obj \
$(TMP_DIR)\tclStrToD.obj \
$(TMP_DIR)\tclStubInit.obj \
$(TMP_DIR)\tclThread.obj \