diff options
Diffstat (limited to 'Tools')
-rw-r--r-- | Tools/perfecthash/GenUCNHash.py | 4 | ||||
-rw-r--r-- | Tools/perfecthash/perfect_hash.py | 58 | ||||
-rw-r--r-- | Tools/perfecthash/perfhash.c | 68 |
3 files changed, 93 insertions, 37 deletions
diff --git a/Tools/perfecthash/GenUCNHash.py b/Tools/perfecthash/GenUCNHash.py index 0701c25..ec69341 100644 --- a/Tools/perfecthash/GenUCNHash.py +++ b/Tools/perfecthash/GenUCNHash.py @@ -12,8 +12,8 @@ import perfect_hash # These variables determine which hash function is tried first. # Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/ -f1Seed = 1694245428 -f2Seed = -1917331657 +f1Seed = 0x64fc2234 +f2Seed = 0x8db7d737 # Maximum allowed multipler, if this isn't None then instead of continually # increasing C, it resets it back to initC to keep searching for diff --git a/Tools/perfecthash/perfect_hash.py b/Tools/perfecthash/perfect_hash.py index bf85739..ccdf85b 100644 --- a/Tools/perfecthash/perfect_hash.py +++ b/Tools/perfecthash/perfect_hash.py @@ -73,25 +73,29 @@ class Hash: key = str(key) if self.caseInsensitive: key = string.upper(key) - x = perfhash.hash(self.seed, len(self.junk), key) % self.N - #h = hash(self.junk + key) % self.N - #assert x == h + x = perfhash.hash(self.seed, len(self.junk), key, self.N) return x def generate_code(self): s = """{ register int len; register unsigned char *p; - register long x; + register unsigned long x; len = cch; p = (unsigned char *) key; - x = %(junkSeed)d; + x = %(junkSeed)s; while (--len >= 0) - x = (1000003*x) ^ """ % \ + { + /* (1000003 * x) ^ toupper(*(p++)) + * translated to handle > 32 bit longs + */ + x = (0xf4243 * x); + x = x & 0xFFFFFFFF; + x = x ^ """ % \ { "lenJunk" : len(self.junk), - "junkSeed" : self.seed, + "junkSeed" : hex(self.seed), } if self.caseInsensitive: @@ -99,20 +103,29 @@ class Hash: else: s = s + "*(p++);" s = s + """ + } x ^= cch + %(lenJunk)d; - if (x == -1) - x = -2; - x %%= k_cHashElements; - /* ensure the returned value is positive so we mimic Python's %% operator */ - if (x < 0) - x += k_cHashElements; + if (x == 0xFFFFFFFF) + x = 0xfffffffe; + if (x & 0x80000000) + { + /* Emulate 32-bit signed (2's complement) modulo operation */ + x = (~x & 0xFFFFFFFF) + 1; + x %%= k_cHashElements; + if (x != 0) + { + x = x + (~k_cHashElements & 0xFFFFFFFF) + 1; + x = (~x & 0xFFFFFFFF) + 1; + } + } + else + x %%= k_cHashElements; return x; } """ % { "lenJunk" : len(self.junk), - "junkSeed" : self.seed, } + "junkSeed" : hex(self.seed), } return s - WHITE, GREY, BLACK = 0,1,2 class Graph: """Graph class. This class isn't particularly efficient or general, @@ -139,8 +152,8 @@ class Graph: value 'value'""" if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1 -# if self.edges.has_key( (vertex1, vertex2) ): -# raise ValueError, 'Collision: vertices already connected' + if self.edges.has_key( (vertex1, vertex2) ): + raise ValueError, 'Collision: vertices already connected' self.edges[ (vertex1, vertex2) ] = value # Add vertices to each other's reachable list @@ -341,8 +354,8 @@ typedef struct %(structName)s """ % (self.cHashElements, self.cchMax, self.cKeys) code = code + """ -static const %s G[k_cHashElements]; -static const %s %s[k_cKeys]; +staticforward const %s G[k_cHashElements]; +staticforward const %s %s[k_cKeys]; """ % (self.type, dataArrayType, dataArrayName) code = code + """ @@ -553,7 +566,7 @@ def generate_hash(keys, caseInsensitive=0, # edge. for k, v in keys: h1 = f1(k) ; h2 = f2(k) - G.connect( h1,h2, v) + G.connect( h1, h2, v) # Check if the resulting graph is acyclic; if it is, # we're done with step 1. @@ -598,8 +611,9 @@ def generate_hash(keys, caseInsensitive=0, sys.stderr.write('Found perfect hash function!\n') sys.stderr.write('\nIn order to regenerate this hash function, \n') sys.stderr.write('you need to pass these following values back in:\n') - sys.stderr.write('f1 seed: %s\n' % repr(f1.seed)) - sys.stderr.write('f2 seed: %s\n' % repr(f2.seed)) + sys.stderr.write('f1 seed: %s\n' % hex(f1.seed)) + sys.stderr.write('f2 seed: %s\n' % hex(f2.seed)) sys.stderr.write('initial multipler: %s\n' % c) return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue) + diff --git a/Tools/perfecthash/perfhash.c b/Tools/perfecthash/perfhash.c index 166b183..6e8a8b5 100644 --- a/Tools/perfecthash/perfhash.c +++ b/Tools/perfecthash/perfhash.c @@ -5,11 +5,13 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw) PyStringObject *a; register int len; register unsigned char *p; - register long x; - long lSeed; + register unsigned long x; + unsigned long ulSeed; unsigned long cchSeed; + unsigned long cHashElements; - if (!PyArg_ParseTuple(args, "iiO:hash", &lSeed, &cchSeed, &a)) + if (!PyArg_ParseTuple(args, "llOl:hash", + &ulSeed, &cchSeed, &a, &cHashElements)) return NULL; if (!PyString_Check(a)) { @@ -19,13 +21,35 @@ static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw) len = a->ob_size; p = (unsigned char *) a->ob_sval; - x = lSeed; + x = ulSeed; while (--len >= 0) - x = (1000003*x) ^ *p++; + { + /* (1000003 * x) ^ *p++ + * translated to handle > 32 bit longs + */ + x = (0xf4243 * x); + x = x & 0xFFFFFFFF; + x = x ^ *p++; + } x ^= a->ob_size + cchSeed; - if (x == -1) - x = -2; - return PyInt_FromLong(x); + if (x == 0xFFFFFFFF) + x = 0xfffffffe; + if (x & 0x80000000) + { + /* Emulate Python 32-bit signed (2's complement) + * modulo operation + */ + x = (~x & 0xFFFFFFFF) + 1; + x %= cHashElements; + if (x != 0) + { + x = x + (~cHashElements & 0xFFFFFFFF) + 1; + x = (~x & 0xFFFFFFFF) + 1; + } + } + else + x %= cHashElements; + return PyInt_FromLong((long)x); } static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) @@ -33,7 +57,7 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) PyStringObject *a; register int len; register unsigned char *p; - register long x; + register unsigned long x; if (!PyString_Check(args)) { @@ -45,10 +69,17 @@ static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw) len = a->ob_size; p = (unsigned char *) a->ob_sval; - x = *p << 7; + x = (*p << 7) & 0xFFFFFFFF; while (--len >= 0) - x = (1000003*x) ^ *p++; - return PyInt_FromLong(x); + { + /* (1000003 * x) ^ *p++ + * translated to handle > 32 bit longs + */ + x = (0xf4243 * x); + x = x & 0xFFFFFFFF; + x = x ^ *p++; + } + return PyInt_FromLong((long)x); } @@ -68,5 +99,16 @@ void initperfhash(void) m = Py_InitModule4("perfhash", hashMethods, NULL, NULL, PYTHON_API_VERSION); if ( m == NULL ) - Py_FatalError("can't initialize module hashModule"); + Py_FatalError("can't initialize module perfhash"); } + + + + + + + + + + + |