From 26a9d6154a6bfc83698a279baa259415aac8fab2 Mon Sep 17 00:00:00 2001 From: ericm Date: Sat, 24 Jun 2000 00:26:08 +0000 Subject: * doc/Hash.3: Added documentation patch for Tcl_Obj *'s as keys in Tcl hash tables [RFE: 5934]. * generic/tcl.h: * generic/tclHash.c: Applied patch from [RFE: 5934], which extends Tcl hash tables to allow Tcl_Obj *'s as the key. --- ChangeLog | 9 ++ doc/Hash.3 | 13 ++- generic/tcl.h | 18 ++-- generic/tclHash.c | 251 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 278 insertions(+), 13 deletions(-) diff --git a/ChangeLog b/ChangeLog index 297d204..311c696 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2000-06-23 Eric Melski + + * doc/Hash.3: Added documentation patch for Tcl_Obj *'s as keys in + Tcl hash tables [RFE: 5934]. + + * generic/tcl.h: + * generic/tclHash.c: Applied patch from [RFE: 5934], which extends + Tcl hash tables to allow Tcl_Obj *'s as the key. + 2000-06-20 Eric Melski * library/opt0.4/optparse.tcl: Applied patch from [Bug: 5922], which diff --git a/doc/Hash.3 b/doc/Hash.3 index 72af625..d15b464 100644 --- a/doc/Hash.3 +++ b/doc/Hash.3 @@ -5,7 +5,7 @@ '\" See the file "license.terms" for information on usage and redistribution '\" of this file, and for a DISCLAIMER OF ALL WARRANTIES. '\" -'\" RCS: @(#) $Id: Hash.3,v 1.3 1999/12/21 23:57:33 hobbs Exp $ +'\" RCS: @(#) $Id: Hash.3,v 1.4 2000/06/24 00:26:08 ericm Exp $ '\" .so man.macros .TH Tcl_Hash 3 "" Tcl "Tcl Library Procedures" @@ -52,7 +52,7 @@ Address of hash table structure (for all procedures but previous call to \fBTcl_InitHashTable\fR). .AP int keyType in Kind of keys to use for new hash table. Must be either -TCL_STRING_KEYS, TCL_ONE_WORD_KEYS, or an integer value +TCL_STRING_KEYS, TCL_OBJ_KEYS, TCL_ONE_WORD_KEYS, or an integer value greater than 1. .AP char *key in Key to use for probe into table. Exact form depends on @@ -107,13 +107,20 @@ one of the following values: Keys are null-terminated ASCII strings. They are passed to hashing routines using the address of the first character of the string. +.IP \fBTCL_OBJ_KEYS\fR 25 +Keys are Tcl_Obj *. Hashing and comparison are done on the string +representation of the object. The differences between this type and +TCL_STRING_KEYS are: the key is not copied, instead the reference +count is incremented; and the extra information associated with the +Tcl_Obj * is used to optimize comparisons. The string is only +compared if the two Tcl_Obj * are different and have the same length. .IP \fBTCL_ONE_WORD_KEYS\fR 25 Keys are single-word values; they are passed to hashing routines and stored in hash table entries as ``char *'' values. The pointer value is the key; it need not (and usually doesn't) actually point to a string. .IP \fIother\fR 25 -If \fIkeyType\fR is not TCL_STRING_KEYS or TCL_ONE_WORD_KEYS, +If \fIkeyType\fR is not one of the above, then it must be an integer value greater than 1. In this case the keys will be arrays of ``int'' values, where \fIkeyType\fR gives the number of ints in each key. diff --git a/generic/tcl.h b/generic/tcl.h index cae5dde..ed6c2ac 100644 --- a/generic/tcl.h +++ b/generic/tcl.h @@ -12,7 +12,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tcl.h,v 1.71 2000/05/03 00:15:06 hobbs Exp $ + * RCS: @(#) $Id: tcl.h,v 1.72 2000/06/24 00:26:08 ericm Exp $ */ #ifndef _TCL @@ -969,6 +969,7 @@ typedef struct Tcl_HashEntry { * with Tcl_SetHashValue. */ union { /* Key has one of these forms: */ char *oneWordValue; /* One-word value for key. */ + Tcl_Obj *objPtr; /* Tcl_Obj * key value. */ int words[1]; /* Multiple integer words for key. * The actual size will be as large * as necessary for this table's @@ -1005,10 +1006,10 @@ typedef struct Tcl_HashTable { int mask; /* Mask value used in hashing * function. */ int keyType; /* Type of keys used in this table. - * It's either TCL_STRING_KEYS, - * TCL_ONE_WORD_KEYS, or an integer - * giving the number of ints that - * is the size of the key. + * It's either TCL_OBJ_KEYS, + * TCL_STRING_KEYS, TCL_ONE_WORD_KEYS, + * or an integer giving the number of + * ints that is the size of the key. */ Tcl_HashEntry *(*findProc) _ANSI_ARGS_((struct Tcl_HashTable *tablePtr, CONST char *key)); @@ -1033,6 +1034,7 @@ typedef struct Tcl_HashSearch { * Acceptable key types for hash tables: */ +#define TCL_OBJ_KEYS -1 #define TCL_STRING_KEYS 0 #define TCL_ONE_WORD_KEYS 1 @@ -1043,8 +1045,10 @@ typedef struct Tcl_HashSearch { #define Tcl_GetHashValue(h) ((h)->clientData) #define Tcl_SetHashValue(h, value) ((h)->clientData = (ClientData) (value)) #define Tcl_GetHashKey(tablePtr, h) \ - ((char *) (((tablePtr)->keyType == TCL_ONE_WORD_KEYS) ? (h)->key.oneWordValue \ - : (h)->key.string)) + ((char *) (((tablePtr)->keyType == TCL_ONE_WORD_KEYS || \ + (tablePtr)->keyType == TCL_OBJ_KEYS) \ + ? (h)->key.oneWordValue \ + : (h)->key.string)) /* * Macros to use for clients to use to invoke find and create procedures diff --git a/generic/tclHash.c b/generic/tclHash.c index 973c003..374f0e5 100644 --- a/generic/tclHash.c +++ b/generic/tclHash.c @@ -10,7 +10,7 @@ * See the file "license.terms" for information on usage and redistribution * of this file, and for a DISCLAIMER OF ALL WARRANTIES. * - * RCS: @(#) $Id: tclHash.c,v 1.3 1999/04/16 00:46:46 stanton Exp $ + * RCS: @(#) $Id: tclHash.c,v 1.4 2000/06/24 00:26:09 ericm Exp $ */ #include "tclInt.h" @@ -56,6 +56,11 @@ static Tcl_HashEntry * OneWordFind _ANSI_ARGS_((Tcl_HashTable *tablePtr, CONST char *key)); static Tcl_HashEntry * OneWordCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr, CONST char *key, int *newPtr)); +static unsigned int HashObj _ANSI_ARGS_((Tcl_Obj *objPtr)); +static Tcl_HashEntry * ObjFind _ANSI_ARGS_((Tcl_HashTable *tablePtr, + CONST char *key)); +static Tcl_HashEntry * ObjCreate _ANSI_ARGS_((Tcl_HashTable *tablePtr, + CONST char *key, int *newPtr)); /* *---------------------------------------------------------------------- @@ -97,7 +102,10 @@ Tcl_InitHashTable(tablePtr, keyType) tablePtr->downShift = 28; tablePtr->mask = 3; tablePtr->keyType = keyType; - if (keyType == TCL_STRING_KEYS) { + if (keyType == TCL_OBJ_KEYS) { + tablePtr->findProc = ObjFind; + tablePtr->createProc = ObjCreate; + } else if (keyType == TCL_STRING_KEYS) { tablePtr->findProc = StringFind; tablePtr->createProc = StringCreate; } else if (keyType == TCL_ONE_WORD_KEYS) { @@ -147,6 +155,10 @@ Tcl_DeleteHashEntry(entryPtr) } } } + + if (entryPtr->tablePtr->keyType == TCL_OBJ_KEYS) { + Tcl_DecrRefCount (entryPtr->key.objPtr); + } entryPtr->tablePtr->numEntries--; ckfree((char *) entryPtr); } @@ -183,6 +195,9 @@ Tcl_DeleteHashTable(tablePtr) hPtr = tablePtr->buckets[i]; while (hPtr != NULL) { nextPtr = hPtr->nextPtr; + if (tablePtr->keyType == TCL_OBJ_KEYS) { + Tcl_DecrRefCount (hPtr->key.objPtr); + } ckfree((char *) hPtr); hPtr = nextPtr; } @@ -844,6 +859,234 @@ BogusCreate(tablePtr, key, newPtr) /* *---------------------------------------------------------------------- * + * HashObj -- + * + * Compute a one-word summary of the string representation of the + * Tcl_Obj, which can be used to generate a hash index. + * + * Results: + * The return value is a one-word summary of the information in + * the string representation of the Tcl_Obj. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static unsigned int +HashObj(objPtr) + Tcl_Obj *objPtr; +{ + register CONST char *string; + register int length; + register unsigned int result; + register int c; + + string = Tcl_GetStringFromObj (objPtr, NULL); + length = objPtr->length; + + /* + * I tried a zillion different hash functions and asked many other + * people for advice. Many people had their own favorite functions, + * all different, but no-one had much idea why they were good ones. + * I chose the one below (multiply by 9 and add new character) + * because of the following reasons: + * + * 1. Multiplying by 10 is perfect for keys that are decimal strings, + * and multiplying by 9 is just about as good. + * 2. Times-9 is (shift-left-3) plus (old). This means that each + * character's bits hang around in the low-order bits of the + * hash value for ever, plus they spread fairly rapidly up to + * the high-order bits to fill out the hash value. This seems + * works well both for decimal and non-decimal strings. + */ + + result = 0; + while (length) { + c = *string; + string++; + length--; + if (length == 0) { + break; + } + result += (result<<3) + c; + } + return result; +} + +/* + *---------------------------------------------------------------------- + * + * ObjFind -- + * + * Given a hash table with string keys, and a string key, find + * the entry with a matching key. + * + * Results: + * The return value is a token for the matching entry in the + * hash table, or NULL if there was no matching entry. + * + * Side effects: + * None. + * + *---------------------------------------------------------------------- + */ + +static Tcl_HashEntry * +ObjFind(tablePtr, key) + Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */ + CONST char *key; /* Key to use to find matching entry. */ +{ + Tcl_Obj *objPtr = (Tcl_Obj *) key; + register Tcl_HashEntry *hPtr; + register CONST char *p1, *p2; + register int l1, l2; + int index; + + index = HashObj(objPtr) & tablePtr->mask; + + /* + * Search all of the entries in the appropriate bucket. + */ + + for (hPtr = tablePtr->buckets[index]; hPtr != NULL; + hPtr = hPtr->nextPtr) { + /* + * If the object pointers are the same then they match. + */ + if (objPtr == hPtr->key.objPtr) { + return hPtr; + } + + p1 = Tcl_GetStringFromObj (objPtr, (int *) 0); + l1 = objPtr->length; + p2 = Tcl_GetStringFromObj (hPtr->key.objPtr, (int *) 0); + l2 = hPtr->key.objPtr->length; + + /* + * If the lengths are different then they do not match. + */ + if (l1 != l2) { + continue; + } + + for (;; p1++, p2++, l1--) { + if (*p1 != *p2) { + break; + } + if (l1 == 0) { + return hPtr; + } + } + } + return NULL; +} + +/* + *---------------------------------------------------------------------- + * + * ObjCreate -- + * + * Given a hash table with string keys, and a string key, find + * the entry with a matching key. If there is no matching entry, + * then create a new entry that does match. + * + * Results: + * The return value is a pointer to the matching entry. If this + * is a newly-created entry, then *newPtr will be set to a non-zero + * value; otherwise *newPtr will be set to 0. If this is a new + * entry the value stored in the entry will initially be 0. + * + * Side effects: + * A new entry may be added to the hash table. + * + *---------------------------------------------------------------------- + */ + +static Tcl_HashEntry * +ObjCreate(tablePtr, key, newPtr) + Tcl_HashTable *tablePtr; /* Table in which to lookup entry. */ + CONST char *key; /* Key to use to find or create matching + * entry. */ + int *newPtr; /* Store info here telling whether a new + * entry was created. */ +{ + Tcl_Obj *objPtr = (Tcl_Obj *) key; + register Tcl_HashEntry *hPtr; + register CONST char *p1, *p2; + register int l1, l2; + int index; + + index = HashObj(objPtr) & tablePtr->mask; + + /* + * Search all of the entries in this bucket. + */ + + for (hPtr = tablePtr->buckets[index]; hPtr != NULL; + hPtr = hPtr->nextPtr) { + /* + * If the object pointers are the same then they match. + */ + if (objPtr == hPtr->key.objPtr) { + *newPtr = 0; + return hPtr; + } + + p1 = Tcl_GetStringFromObj (objPtr, (int *) 0); + l1 = objPtr->length; + p2 = Tcl_GetStringFromObj (hPtr->key.objPtr, (int *) 0); + l2 = hPtr->key.objPtr->length; + + /* + * If the lengths are different then they do not match. + */ + if (l1 != l2) { + continue; + } + + for (;; p1++, p2++, l1--) { + if (*p1 != *p2) { + break; + } + if (l1 == 0) { + *newPtr = 0; + return hPtr; + } + } + } + + /* + * Entry not found. Add a new one to the bucket. + */ + + *newPtr = 1; + hPtr = (Tcl_HashEntry *) + Tcl_Alloc((unsigned) sizeof(Tcl_HashEntry)); + hPtr->tablePtr = tablePtr; + hPtr->bucketPtr = &(tablePtr->buckets[index]); + hPtr->nextPtr = *hPtr->bucketPtr; + hPtr->clientData = 0; + hPtr->key.objPtr = objPtr; + Tcl_IncrRefCount (objPtr); + *hPtr->bucketPtr = hPtr; + tablePtr->numEntries++; + + /* + * If the table has exceeded a decent size, rebuild it with many + * more buckets. + */ + + if (tablePtr->numEntries >= tablePtr->rebuildSize) { + RebuildTable(tablePtr); + } + return hPtr; +} + +/* + *---------------------------------------------------------------------- + * * RebuildTable -- * * This procedure is invoked when the ratio of entries to hash @@ -896,7 +1139,9 @@ RebuildTable(tablePtr) for (oldChainPtr = oldBuckets; oldSize > 0; oldSize--, oldChainPtr++) { for (hPtr = *oldChainPtr; hPtr != NULL; hPtr = *oldChainPtr) { *oldChainPtr = hPtr->nextPtr; - if (tablePtr->keyType == TCL_STRING_KEYS) { + if (tablePtr->keyType == TCL_OBJ_KEYS) { + index = HashObj(hPtr->key.objPtr) & tablePtr->mask; + } else if (tablePtr->keyType == TCL_STRING_KEYS) { index = HashString(hPtr->key.string) & tablePtr->mask; } else if (tablePtr->keyType == TCL_ONE_WORD_KEYS) { index = RANDOM_INDEX(tablePtr, hPtr->key.oneWordValue); -- cgit v0.12