diff options
Diffstat (limited to 'generic/tclStrIdxTree.c')
-rw-r--r-- | generic/tclStrIdxTree.c | 527 |
1 files changed, 0 insertions, 527 deletions
diff --git a/generic/tclStrIdxTree.c b/generic/tclStrIdxTree.c deleted file mode 100644 index 557d575..0000000 --- a/generic/tclStrIdxTree.c +++ /dev/null @@ -1,527 +0,0 @@ -/* - * tclStrIdxTree.c -- - * - * Contains the routines for managing string index tries in Tcl. - * - * This code is back-ported from the tclSE engine, by Serg G. Brester. - * - * Copyright (c) 2016 by Sergey G. Brester aka sebres. All rights reserved. - * - * See the file "license.terms" for information on usage and redistribution of - * this file, and for a DISCLAIMER OF ALL WARRANTIES. - * - * ----------------------------------------------------------------------- - * - * String index tries are prepaired structures used for fast greedy search of the string - * (index) by unique string prefix as key. - * - * Index tree build for two lists together can be explained in the following datagram - * - * Lists: - * - * {Januar Februar Maerz April Mai Juni Juli August September Oktober November Dezember} - * {Jnr Fbr Mrz Apr Mai Jni Jli Agt Spt Okt Nvb Dzb} - * - * Index-Tree: - * - * j 0 * ... - * anuar 1 * - * u 0 * a 0 - * ni 6 * pril 4 - * li 7 * ugust 8 - * n 0 * gt 8 - * r 1 * s 9 - * i 6 * eptember 9 - * li 7 * pt 9 - * f 2 * oktober 10 - * ebruar 2 * n 11 - * br 2 * ovember 11 - * m 0 * vb 11 - * a 0 * d 12 - * erz 3 * ezember 12 - * i 5 * zb 12 - * rz 3 * - * ... - * - * Thereby value 0 shows pure group items (corresponding ambigous matches). - * But the group may have a value if it contains only same values - * (see for example group "f" above). - * - * StrIdxTree's are very fast, so: - * build of above-mentioned tree takes about 10 microseconds. - * search of string index in this tree takes fewer as 0.1 microseconds. - * - */ - -#include "tclInt.h" -#include "tclStrIdxTree.h" - - -/* - *---------------------------------------------------------------------- - * - * TclStrIdxTreeSearch -- - * - * Find largest part of string "start" in indexed tree (case sensitive). - * - * Also used for building of string index tree. - * - * Results: - * Return position of UTF character in start after last equal character - * and found item (with parent). - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -MODULE_SCOPE const char* -TclStrIdxTreeSearch( - TclStrIdxTree **foundParent, /* Return value of found sub tree (used for tree build) */ - TclStrIdx **foundItem, /* Return value of found item */ - TclStrIdxTree *tree, /* Index tree will be browsed */ - const char *start, /* UTF string to find in tree */ - const char *end) /* End of string */ -{ - TclStrIdxTree *parent = tree, *prevParent = tree; - TclStrIdx *item = tree->firstPtr, *prevItem = NULL; - const char *s = start, *f, *cin, *cinf, *prevf; - int offs = 0; - - if (item == NULL) { - goto done; - } - - /* search in tree */ - do { - cinf = cin = TclGetString(item->key) + offs; - f = TclUtfFindEqualNCInLwr(s, end, cin, cin + item->length, &cinf); - /* if something was found */ - if (f > s) { - /* if whole string was found */ - if (f >= end) { - start = f; - goto done; - }; - /* set new offset and shift start string */ - offs += cinf - cin; - s = f; - /* if match item, go deeper as long as possible */ - if (offs >= item->length && item->childTree.firstPtr) { - /* save previuosly found item (if not ambigous) for - * possible fallback (few greedy match) */ - if (item->value != NULL) { - prevf = f; - prevItem = item; - prevParent = parent; - } - parent = &item->childTree; - item = item->childTree.firstPtr; - continue; - } - /* no children - return this item and current chars found */ - start = f; - goto done; - } - - item = item->nextPtr; - - } while (item != NULL); - - /* fallback (few greedy match) not ambigous (has a value) */ - if (prevItem != NULL) { - item = prevItem; - parent = prevParent; - start = prevf; - } - -done: - - if (foundParent) - *foundParent = parent; - if (foundItem) - *foundItem = item; - return start; -} - -MODULE_SCOPE void -TclStrIdxTreeFree( - TclStrIdx *tree) -{ - while (tree != NULL) { - TclStrIdx *t; - Tcl_DecrRefCount(tree->key); - if (tree->childTree.firstPtr != NULL) { - TclStrIdxTreeFree(tree->childTree.firstPtr); - } - t = tree, tree = tree->nextPtr; - ckfree(t); - } -} - -/* - * Several bidirectional list primitives - */ -inline void -TclStrIdxTreeInsertBranch( - TclStrIdxTree *parent, - register TclStrIdx *item, - register TclStrIdx *child) -{ - if (parent->firstPtr == child) - parent->firstPtr = item; - if (parent->lastPtr == child) - parent->lastPtr = item; - if ( (item->nextPtr = child->nextPtr) ) { - item->nextPtr->prevPtr = item; - child->nextPtr = NULL; - } - if ( (item->prevPtr = child->prevPtr) ) { - item->prevPtr->nextPtr = item; - child->prevPtr = NULL; - } - item->childTree.firstPtr = child; - item->childTree.lastPtr = child; -} - -inline void -TclStrIdxTreeAppend( - register TclStrIdxTree *parent, - register TclStrIdx *item) -{ - if (parent->lastPtr != NULL) { - parent->lastPtr->nextPtr = item; - } - item->prevPtr = parent->lastPtr; - item->nextPtr = NULL; - parent->lastPtr = item; - if (parent->firstPtr == NULL) { - parent->firstPtr = item; - } -} - - -/* - *---------------------------------------------------------------------- - * - * TclStrIdxTreeBuildFromList -- - * - * Build or extend string indexed tree from tcl list. - * If the values not given the values of built list are indices starts with 1. - * Value of 0 is thereby reserved to the ambigous values. - * - * Important: by multiple lists, optimal tree can be created only if list with - * larger strings used firstly. - * - * Results: - * Returns a standard Tcl result. - * - * Side effects: - * None. - * - *---------------------------------------------------------------------- - */ - -MODULE_SCOPE int -TclStrIdxTreeBuildFromList( - TclStrIdxTree *idxTree, - int lstc, - Tcl_Obj **lstv, - ClientData *values) -{ - Tcl_Obj **lwrv; - int i, ret = TCL_ERROR; - ClientData val; - const char *s, *e, *f; - TclStrIdx *item; - - /* create lowercase reflection of the list keys */ - - lwrv = ckalloc(sizeof(Tcl_Obj*) * lstc); - if (lwrv == NULL) { - return TCL_ERROR; - } - for (i = 0; i < lstc; i++) { - lwrv[i] = Tcl_DuplicateObj(lstv[i]); - if (lwrv[i] == NULL) { - return TCL_ERROR; - } - Tcl_IncrRefCount(lwrv[i]); - lwrv[i]->length = Tcl_UtfToLower(TclGetString(lwrv[i])); - } - - /* build index tree of the list keys */ - for (i = 0; i < lstc; i++) { - TclStrIdxTree *foundParent = idxTree; - e = s = TclGetString(lwrv[i]); - e += lwrv[i]->length; - val = values ? values[i] : INT2PTR(i+1); - - /* ignore empty keys (impossible to index it) */ - if (lwrv[i]->length == 0) continue; - - item = NULL; - if (idxTree->firstPtr != NULL) { - TclStrIdx *foundItem; - f = TclStrIdxTreeSearch(&foundParent, &foundItem, - idxTree, s, e); - /* if common prefix was found */ - if (f > s) { - /* ignore element if fulfilled or ambigous */ - if (f == e) { - continue; - } - /* if shortest key was found with the same value, - * just replace its current key with longest key */ - if ( foundItem->value == val - && foundItem->length < lwrv[i]->length - && foundItem->childTree.firstPtr == NULL - ) { - Tcl_SetObjRef(foundItem->key, lwrv[i]); - foundItem->length = lwrv[i]->length; - continue; - } - /* split tree (e. g. j->(jan,jun) + jul == j->(jan,ju->(jun,jul)) ) - * but don't split by fulfilled child of found item ( ii->iii->iiii ) */ - if (foundItem->length != (f - s)) { - /* first split found item (insert one between parent and found + new one) */ - item = ckalloc(sizeof(*item)); - if (item == NULL) { - goto done; - } - Tcl_InitObjRef(item->key, foundItem->key); - item->length = f - s; - /* set value or mark as ambigous if not the same value of both */ - item->value = (foundItem->value == val) ? val : NULL; - /* insert group item between foundParent and foundItem */ - TclStrIdxTreeInsertBranch(foundParent, item, foundItem); - foundParent = &item->childTree; - } else { - /* the new item should be added as child of found item */ - foundParent = &foundItem->childTree; - } - } - } - /* append item at end of found parent */ - item = ckalloc(sizeof(*item)); - if (item == NULL) { - goto done; - } - item->childTree.lastPtr = item->childTree.firstPtr = NULL; - Tcl_InitObjRef(item->key, lwrv[i]); - item->length = lwrv[i]->length; - item->value = val; - TclStrIdxTreeAppend(foundParent, item); - }; - - ret = TCL_OK; - -done: - - if (lwrv != NULL) { - for (i = 0; i < lstc; i++) { - Tcl_DecrRefCount(lwrv[i]); - } - ckfree(lwrv); - } - - if (ret != TCL_OK) { - if (idxTree->firstPtr != NULL) { - TclStrIdxTreeFree(idxTree->firstPtr); - } - } - - return ret; -} - - -static void -StrIdxTreeObj_DupIntRepProc(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr); -static void -StrIdxTreeObj_FreeIntRepProc(Tcl_Obj *objPtr); -static void -StrIdxTreeObj_UpdateStringProc(Tcl_Obj *objPtr); - -Tcl_ObjType StrIdxTreeObjType = { - "str-idx-tree", /* name */ - StrIdxTreeObj_FreeIntRepProc, /* freeIntRepProc */ - StrIdxTreeObj_DupIntRepProc, /* dupIntRepProc */ - StrIdxTreeObj_UpdateStringProc, /* updateStringProc */ - NULL /* setFromAnyProc */ -}; - -MODULE_SCOPE Tcl_Obj* -TclStrIdxTreeNewObj() -{ - Tcl_Obj *objPtr = Tcl_NewObj(); - objPtr->internalRep.twoPtrValue.ptr1 = NULL; - objPtr->internalRep.twoPtrValue.ptr2 = NULL; - objPtr->typePtr = &StrIdxTreeObjType; - /* return tree root in internal representation */ - return objPtr; -} - -static void -StrIdxTreeObj_DupIntRepProc(Tcl_Obj *srcPtr, Tcl_Obj *copyPtr) -{ - /* follow links (smart pointers) */ - if ( srcPtr->internalRep.twoPtrValue.ptr1 != NULL - && srcPtr->internalRep.twoPtrValue.ptr2 == NULL - ) { - srcPtr = (Tcl_Obj*)srcPtr->internalRep.twoPtrValue.ptr1; - } - /* create smart pointer to it (ptr1 != NULL, ptr2 = NULL) */ - Tcl_InitObjRef(*((Tcl_Obj **)©Ptr->internalRep.twoPtrValue.ptr1), - srcPtr); - copyPtr->internalRep.twoPtrValue.ptr2 = NULL; - copyPtr->typePtr = &StrIdxTreeObjType; -} - -static void -StrIdxTreeObj_FreeIntRepProc(Tcl_Obj *objPtr) -{ - /* follow links (smart pointers) */ - if ( objPtr->internalRep.twoPtrValue.ptr1 != NULL - && objPtr->internalRep.twoPtrValue.ptr2 == NULL - ) { - /* is a link */ - Tcl_UnsetObjRef(*((Tcl_Obj **)&objPtr->internalRep.twoPtrValue.ptr1)); - } else { - /* is a tree */ - TclStrIdxTree *tree = (TclStrIdxTree*)&objPtr->internalRep.twoPtrValue.ptr1; - if (tree->firstPtr != NULL) { - TclStrIdxTreeFree(tree->firstPtr); - } - objPtr->internalRep.twoPtrValue.ptr1 = NULL; - objPtr->internalRep.twoPtrValue.ptr2 = NULL; - } - objPtr->typePtr = NULL; -}; - -static void -StrIdxTreeObj_UpdateStringProc(Tcl_Obj *objPtr) -{ - /* currently only dummy empty string possible */ - objPtr->length = 0; - objPtr->bytes = &tclEmptyString; -}; - -MODULE_SCOPE TclStrIdxTree * -TclStrIdxTreeGetFromObj(Tcl_Obj *objPtr) { - /* follow links (smart pointers) */ - if (objPtr->typePtr != &StrIdxTreeObjType) { - return NULL; - } - if ( objPtr->internalRep.twoPtrValue.ptr1 != NULL - && objPtr->internalRep.twoPtrValue.ptr2 == NULL - ) { - objPtr = (Tcl_Obj*)objPtr->internalRep.twoPtrValue.ptr1; - } - /* return tree root in internal representation */ - return (TclStrIdxTree*)&objPtr->internalRep.twoPtrValue.ptr1; -} - -/* - * Several debug primitives - */ -#if 0 -/* currently unused, debug resp. test purposes only */ - -void -TclStrIdxTreePrint( - Tcl_Interp *interp, - TclStrIdx *tree, - int offs) -{ - Tcl_Obj *obj[2]; - const char *s; - Tcl_InitObjRef(obj[0], Tcl_NewStringObj("::puts", -1)); - while (tree != NULL) { - s = TclGetString(tree->key) + offs; - Tcl_InitObjRef(obj[1], Tcl_ObjPrintf("%*s%.*s\t:%d", - offs, "", tree->length - offs, s, tree->value)); - Tcl_PutsObjCmd(NULL, interp, 2, obj); - Tcl_UnsetObjRef(obj[1]); - if (tree->childTree.firstPtr != NULL) { - TclStrIdxTreePrint(interp, tree->childTree.firstPtr, tree->length); - } - tree = tree->nextPtr; - } - Tcl_UnsetObjRef(obj[0]); -} - - -MODULE_SCOPE int -TclStrIdxTreeTestObjCmd( - ClientData clientData, Tcl_Interp *interp, - int objc, Tcl_Obj *const objv[]) -{ - const char *cs, *cin, *ret; - - static const char *const options[] = { - "index", "puts-index", "findequal", - NULL - }; - enum optionInd { - O_INDEX, O_PUTS_INDEX, O_FINDEQUAL - }; - int optionIndex; - - if (objc < 2) { - Tcl_WrongNumArgs(interp, 1, objv, ""); - return TCL_ERROR; - } - if (Tcl_GetIndexFromObj(interp, objv[1], options, - "option", 0, &optionIndex) != TCL_OK) { - Tcl_SetErrorCode(interp, "CLOCK", "badOption", - Tcl_GetString(objv[1]), NULL); - return TCL_ERROR; - } - switch (optionIndex) { - case O_FINDEQUAL: - if (objc < 4) { - Tcl_WrongNumArgs(interp, 1, objv, ""); - return TCL_ERROR; - } - cs = TclGetString(objv[2]); - cin = TclGetString(objv[3]); - ret = TclUtfFindEqual( - cs, cs + objv[1]->length, cin, cin + objv[2]->length); - Tcl_SetObjResult(interp, Tcl_NewIntObj(ret - cs)); - break; - case O_INDEX: - case O_PUTS_INDEX: - - if (1) { - Tcl_Obj **lstv; - int i, lstc; - TclStrIdxTree idxTree = {NULL, NULL}; - i = 1; - while (++i < objc) { - if (TclListObjGetElements(interp, objv[i], - &lstc, &lstv) != TCL_OK) { - return TCL_ERROR; - }; - TclStrIdxTreeBuildFromList(&idxTree, lstc, lstv, NULL); - } - if (optionIndex == O_PUTS_INDEX) { - TclStrIdxTreePrint(interp, idxTree.firstPtr, 0); - } - TclStrIdxTreeFree(idxTree.firstPtr); - } - break; - } - - return TCL_OK; -} - -#endif - -/* - * Local Variables: - * mode: c - * c-basic-offset: 4 - * fill-column: 78 - * End: - */ |