summaryrefslogtreecommitdiffstats
path: root/generic
diff options
context:
space:
mode:
authordkf <donal.k.fellows@manchester.ac.uk>2010-02-07 09:10:32 (GMT)
committerdkf <donal.k.fellows@manchester.ac.uk>2010-02-07 09:10:32 (GMT)
commitbaafc87e3c7fe20ed29437a7e7b44f9f06f4e974 (patch)
treed772606db0a33eea164149ef48f66190338f8673 /generic
parent307233f5f049dea251e0d858e301565c0ed88117 (diff)
downloadtcl-baafc87e3c7fe20ed29437a7e7b44f9f06f4e974.zip
tcl-baafc87e3c7fe20ed29437a7e7b44f9f06f4e974.tar.gz
tcl-baafc87e3c7fe20ed29437a7e7b44f9f06f4e974.tar.bz2
Upgrade Tcl's hash function to use the FNV-32 algorithm. This is marginally
faster and gives a bit better distribution of keys (especially in large hash tables) but does change hash iteration order.
Diffstat (limited to 'generic')
-rw-r--r--generic/tclHash.c33
-rw-r--r--generic/tclObj.c35
-rw-r--r--generic/tclVar.c38
3 files changed, 30 insertions, 76 deletions
diff --git a/generic/tclHash.c b/generic/tclHash.c
index 9ed941e..f3c4b3a 100644
--- a/generic/tclHash.c
+++ b/generic/tclHash.c
@@ -10,7 +10,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclHash.c,v 1.39 2009/07/16 21:24:39 dgp Exp $
+ * RCS: @(#) $Id: tclHash.c,v 1.40 2010/02/07 09:10:33 dkf Exp $
*/
#include "tclInt.h"
@@ -866,36 +866,29 @@ CompareStringKeys(
*----------------------------------------------------------------------
*/
-static unsigned int
+static unsigned
HashStringKey(
Tcl_HashTable *tablePtr, /* Hash table. */
void *keyPtr) /* Key from which to compute hash value. */
{
register const char *string = (const char *) keyPtr;
- register unsigned int result;
+ register unsigned result = 0;
register int c;
/*
- * I tried a zillion different hash functions and asked many other people
- * for advice. Many people had their own favorite functions, all
- * different, but no-one had much idea why they were good ones. I chose
- * the one below (multiply by 9 and add new character) because of the
- * following reasons:
+ * This is the (32-bit) Fowler/Noll/Vo hash algorithm. This has the
+ * property of being a reasonably good non-cryptographic hash function for
+ * short string words, i.e., virtually all command and namespace names. It
+ * is also faster than Tcl's original algorithm on Intel x86, where there
+ * is a fast built-in multiply assembly instruction.
*
- * 1. Multiplying by 10 is perfect for keys that are decimal strings, and
- * multiplying by 9 is just about as good.
- * 2. Times-9 is (shift-left-3) plus (old). This means that each
- * character's bits hang around in the low-order bits of the hash value
- * for ever, plus they spread fairly rapidly up to the high-order bits
- * to fill out the hash value. This seems works well both for decimal
- * and non-decimal strings, but isn't strong against maliciously-chosen
- * keys.
+ * Derived from Public Domain implementation by Landon Curt Noll at:
+ * http://www.isthe.com/chongo/src/fnv/hash_32.c
*/
- result = 0;
-
- for (c=*string++ ; c ; c=*string++) {
- result += (result<<3) + c;
+#define FNV_32_PRIME ((unsigned) 0x01000193)
+ while ((c=*string++)) {
+ result = (result * FNV_32_PRIME) ^ c;
}
return result;
}
diff --git a/generic/tclObj.c b/generic/tclObj.c
index 28d7a8a..6287b24 100644
--- a/generic/tclObj.c
+++ b/generic/tclObj.c
@@ -13,7 +13,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclObj.c,v 1.166 2009/12/29 16:54:44 dkf Exp $
+ * RCS: @(#) $Id: tclObj.c,v 1.167 2010/02/07 09:10:33 dkf Exp $
*/
#include "tclInt.h"
@@ -4037,30 +4037,25 @@ TclHashObjKey(
Tcl_HashTable *tablePtr, /* Hash table. */
void *keyPtr) /* Key from which to compute hash value. */
{
- Tcl_Obj *objPtr = (Tcl_Obj *) keyPtr;
- const char *string = TclGetString(objPtr);
- int length = objPtr->length;
- unsigned int result = 0;
- int i;
+ Tcl_Obj *objPtr = keyPtr;
+ register unsigned result = 0;
+ const unsigned char *string = (unsigned char *) TclGetString(objPtr);
+ const unsigned char *last = string + objPtr->length;
/*
- * I tried a zillion different hash functions and asked many other people
- * for advice. Many people had their own favorite functions, all
- * different, but no-one had much idea why they were good ones. I chose
- * the one below (multiply by 9 and add new character) because of the
- * following reasons:
+ * This is the (32-bit) Fowler/Noll/Vo hash algorithm. This has the
+ * property of being a reasonably good non-cryptographic hash function for
+ * short string words, i.e., virtually all names used in practice. It is
+ * also faster than Tcl's original algorithm on Intel x86, where there is
+ * a fast built-in multiply assembly instruction.
*
- * 1. Multiplying by 10 is perfect for keys that are decimal strings, and
- * multiplying by 9 is just about as good.
- * 2. Times-9 is (shift-left-3) plus (old). This means that each
- * character's bits hang around in the low-order bits of the hash value
- * for ever, plus they spread fairly rapidly up to the high-order bits
- * to fill out the hash value. This seems works well both for decimal
- * and *non-decimal strings.
+ * Derived from Public Domain implementation by Landon Curt Noll at:
+ * http://www.isthe.com/chongo/src/fnv/hash_32.c
*/
- for (i=0 ; i<length ; i++) {
- result += (result << 3) + string[i];
+#define FNV_32_PRIME ((unsigned) 0x01000193)
+ while (string < last) {
+ result = (result * FNV_32_PRIME) ^ (*string++);
}
return result;
}
diff --git a/generic/tclVar.c b/generic/tclVar.c
index 79409b4..3f6130e 100644
--- a/generic/tclVar.c
+++ b/generic/tclVar.c
@@ -16,7 +16,7 @@
* See the file "license.terms" for information on usage and redistribution of
* this file, and for a DISCLAIMER OF ALL WARRANTIES.
*
- * RCS: @(#) $Id: tclVar.c,v 1.195 2010/02/05 14:33:09 dkf Exp $
+ * RCS: @(#) $Id: tclVar.c,v 1.196 2010/02/07 09:10:33 dkf Exp $
*/
#include "tclInt.h"
@@ -28,12 +28,11 @@
static Tcl_HashEntry * AllocVarEntry(Tcl_HashTable *tablePtr, void *keyPtr);
static void FreeVarEntry(Tcl_HashEntry *hPtr);
static int CompareVarKeys(void *keyPtr, Tcl_HashEntry *hPtr);
-static unsigned HashVarKey(Tcl_HashTable *tablePtr, void *keyPtr);
static const Tcl_HashKeyType tclVarHashKeyType = {
TCL_HASH_KEY_TYPE_VERSION, /* version */
0, /* flags */
- HashVarKey, /* hashKeyProc */
+ TclHashObjKey, /* hashKeyProc */
CompareVarKeys, /* compareKeysProc */
AllocVarEntry, /* allocEntryProc */
FreeVarEntry /* freeEntryProc */
@@ -6453,39 +6452,6 @@ CompareVarKeys(
return 0;
}
-
-static unsigned
-HashVarKey(
- Tcl_HashTable *tablePtr, /* Hash table. */
- void *keyPtr) /* Key from which to compute hash value. */
-{
- Tcl_Obj *objPtr = keyPtr;
- const char *string = TclGetString(objPtr);
- int length = objPtr->length;
- register unsigned result = 0;
- int i;
-
- /*
- * I tried a zillion different hash functions and asked many other people
- * for advice. Many people had their own favorite functions, all
- * different, but no-one had much idea why they were good ones. I chose
- * the one below (multiply by 9 and add new character) because of the
- * following reasons:
- *
- * 1. Multiplying by 10 is perfect for keys that are decimal strings, and
- * multiplying by 9 is just about as good.
- * 2. Times-9 is (shift-left-3) plus (old). This means that each
- * character's bits hang around in the low-order bits of the hash value
- * for ever, plus they spread fairly rapidly up to the high-order bits
- * to fill out the hash value. This seems works well both for decimal
- * and non-decimal strings.
- */
-
- for (i=0 ; i<length ; i++) {
- result += (result << 3) + string[i];
- }
- return result;
-}
/*
* Local Variables: