summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
Diffstat (limited to 'Tools')
-rw-r--r--Tools/perfecthash/GenUCNHash.py109
-rw-r--r--Tools/perfecthash/perfect_hash.py619
-rw-r--r--Tools/perfecthash/perfhash.c114
3 files changed, 0 insertions, 842 deletions
diff --git a/Tools/perfecthash/GenUCNHash.py b/Tools/perfecthash/GenUCNHash.py
deleted file mode 100644
index ec69341..0000000
--- a/Tools/perfecthash/GenUCNHash.py
+++ /dev/null
@@ -1,109 +0,0 @@
-#! /usr/bin/env python
-import sys
-import string
-import perfect_hash
-
-# This is a user of perfect_hash.py
-# that takes as input the UnicodeData.txt file available from:
-# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt
-
-# It generates a hash table from Unicode Character Name ->
-# unicode code space value.
-
-# These variables determine which hash function is tried first.
-# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/
-f1Seed = 0x64fc2234
-f2Seed = 0x8db7d737
-
-# Maximum allowed multipler, if this isn't None then instead of continually
-# increasing C, it resets it back to initC to keep searching for
-# a solution.
-minC = 1.7875
-# Initial multiplier for trying to find a perfect hash function.
-initC = 1.7875
-
-moduleName = "ucnhash"
-dataArrayName = "aucn"
-dataArrayType = "_Py_UnicodeCharacterName"
-headerFileName = "ucnhash.h"
-cFileName = "ucnhash.c"
-structName = "_Py_UCNHashAPI"
-
-keys = []
-hashData = {}
-
-def generateOutputFiles(perfHash, hashData):
- header = perfHash.generate_header(structName)
- header = header + """
-typedef struct
-{
- const char *pszUCN;
- Py_UCS4 value;
-} _Py_UnicodeCharacterName;
-
-"""
-
- code = perfHash.generate_code(moduleName,
- dataArrayName,
- dataArrayType,
- structName)
- out = open(headerFileName, "w")
- out.write(header)
- out = open(cFileName, "w")
- out.write("#include \"%s\"\n" % headerFileName)
- out.write(code)
- perfHash.generate_graph(out)
- out.write("""
-
-static const _Py_UnicodeCharacterName aucn[] =
-{
-""")
- for i in xrange(len(keys)):
- v = hashData[keys[i][0]]
- out.write(' { "' + keys[i][0] + '", ' + hex(v) + " }," + "\n")
- out.write("};\n\n")
- sys.stderr.write('\nGenerated output files: \n')
- sys.stderr.write('%s\n%s\n' % (headerFileName, cFileName))
-
-def main():
- # Suck in UnicodeData.txt and spit out the generated files.
- input = open(sys.argv[1], 'r')
- i = 0
- while 1:
- line = input.readline()
- if line == "": break
- fields = string.split(line, ';')
- if len(fields) < 2:
- sys.stderr.write('Ill-formated line!\n')
- sys.stderr.write('line #: %d\n' % (i + 1))
- sys.exit()
- data, key = fields[:2]
- key = string.strip( key )
- # Any name starting with '<' is a control, or start/end character,
- # so skip it...
- if key[0] == "<":
- continue
- hashcode = i
- i = i + 1
- # force the name to uppercase
- keys.append( (string.upper(key),hashcode) )
- data = string.atoi(data, 16)
- hashData[key] = data
-
- input.close()
- sys.stderr.write('%i key/hash pairs read\n' % len(keys) )
- perfHash = perfect_hash.generate_hash(keys, 1,
- minC, initC,
- f1Seed, f2Seed,
- # increment, tries
- 0.0025, 50)
- generateOutputFiles(perfHash, hashData)
-
-if __name__ == '__main__':
- if len(sys.argv) == 1:
- sys.stdout = sys.stderr
- print 'Usage: %s <input filename>' % sys.argv[0]
- print ' The input file needs to be UnicodeData.txt'
- sys.exit()
- main()
-
diff --git a/Tools/perfecthash/perfect_hash.py b/Tools/perfecthash/perfect_hash.py
deleted file mode 100644
index ccdf85b..0000000
--- a/Tools/perfecthash/perfect_hash.py
+++ /dev/null
@@ -1,619 +0,0 @@
-#!/usr/bin/env/python
-
-# perfect_hash.py
-#
-# Outputs C code for a minimal perfect hash.
-# The hash is produced using the algorithm described in
-# "Optimal algorithms for minimal perfect hashing",
-# G. Havas, B.S. Majewski. Available as a technical report
-# from the CS department, University of Queensland
-# (ftp://ftp.cs.uq.oz.au/).
-#
-# This is a modified version of Andrew Kuchling's code
-# (http://starship.python.net/crew/amk/python/code/perfect-hash.html)
-# and generates C fragments suitable for compilation as a Python
-# extension module.
-#
-
-# Difference between this algorithm and gperf:
-# Gperf will complete in finite time with a successful function,
-# or by giving up.
-# This algorithm may never complete, although it is extremely likely
-# when c >= 2.
-
-# The algorithm works like this:
-# 0) You have K keys, that you want to perfectly hash to a bunch
-# of hash values.
-#
-# 1) Choose a number N larger than K. This is the number of
-# vertices in a graph G, and also the size of the resulting table.
-#
-# 2) Pick two random hash functions f1, f2, that output values from
-# 0...N-1.
-#
-# 3) for key in keys:
-# h1 = f1(key) ; h2 = f2(key)
-# Draw an edge between vertices h1 and h2 of the graph.
-# Associate the desired hash value with that edge.
-#
-# 4) Check if G is acyclic; if not, go back to step 1 and pick a bigger N.
-#
-# 5) Assign values to each vertex such that, for each edge, you can
-# add the values for the two vertices and get the desired value
-# for that edge -- which is the desired hash key. This task is
-# dead easy, because the graph is acyclic. This is done by
-# picking a vertex V, and assigning it a value of 0. You then do a
-# depth-first search, assigning values to new vertices so that
-# they sum up properly.
-#
-# 6) f1, f2, and G now make up your perfect hash function.
-
-
-import sys, whrandom, string
-import pprint
-import perfhash
-import time
-
-class Hash:
- """Random hash function
- For simplicity and speed, this doesn't implement any byte-level hashing
- scheme. Instead, a random string is generated and prefixing to
- str(key), and then Python's hashing function is used."""
-
- def __init__(self, N, caseInsensitive=0):
- self.N = N
- junk = ""
- for i in range(10):
- junk = junk + whrandom.choice(string.letters + string.digits)
- self.junk = junk
- self.caseInsensitive = caseInsensitive
- self.seed = perfhash.calcSeed(junk)
-
- def __call__(self, key):
- key = str(key)
- if self.caseInsensitive:
- key = string.upper(key)
- x = perfhash.hash(self.seed, len(self.junk), key, self.N)
- return x
-
- def generate_code(self):
- s = """{
- register int len;
- register unsigned char *p;
- register unsigned long x;
-
- len = cch;
- p = (unsigned char *) key;
- x = %(junkSeed)s;
- while (--len >= 0)
- {
- /* (1000003 * x) ^ toupper(*(p++))
- * translated to handle > 32 bit longs
- */
- x = (0xf4243 * x);
- x = x & 0xFFFFFFFF;
- x = x ^ """ % \
- {
- "lenJunk" : len(self.junk),
- "junkSeed" : hex(self.seed),
- }
-
- if self.caseInsensitive:
- s = s + "toupper(*(p++));"
- else:
- s = s + "*(p++);"
- s = s + """
- }
- x ^= cch + %(lenJunk)d;
- if (x == 0xFFFFFFFF)
- x = 0xfffffffe;
- if (x & 0x80000000)
- {
- /* Emulate 32-bit signed (2's complement) modulo operation */
- x = (~x & 0xFFFFFFFF) + 1;
- x %%= k_cHashElements;
- if (x != 0)
- {
- x = x + (~k_cHashElements & 0xFFFFFFFF) + 1;
- x = (~x & 0xFFFFFFFF) + 1;
- }
- }
- else
- x %%= k_cHashElements;
- return x;
-}
-""" % { "lenJunk" : len(self.junk),
- "junkSeed" : hex(self.seed), }
- return s
-
-WHITE, GREY, BLACK = 0,1,2
-class Graph:
- """Graph class. This class isn't particularly efficient or general,
- and only has the features I needed to implement this algorithm.
-
- num_vertices -- number of vertices
- edges -- maps 2-tuples of vertex numbers to the value for this
- edge. If there's an edge between v1 and v2 (v1<v2),
- (v1,v2) is a key and the value is the edge's value.
- reachable_list -- maps a vertex V to the list of vertices
- to which V is connected by edges. Used
- for traversing the graph.
- values -- numeric value for each vertex
- """
-
- def __init__(self, num_vertices):
- self.num_vertices = num_vertices
- self.edges = {}
- self.reachable_list = {}
- self.values = [-1] * num_vertices
-
- def connect(self, vertex1, vertex2, value):
- """Connect 'vertex1' and 'vertex2' with an edge, with associated
- value 'value'"""
-
- if vertex1 > vertex2: vertex1, vertex2 = vertex2, vertex1
- if self.edges.has_key( (vertex1, vertex2) ):
- raise ValueError, 'Collision: vertices already connected'
- self.edges[ (vertex1, vertex2) ] = value
-
- # Add vertices to each other's reachable list
- if not self.reachable_list.has_key( vertex1 ):
- self.reachable_list[ vertex1 ] = [vertex2]
- else:
- self.reachable_list[vertex1].append(vertex2)
-
- if not self.reachable_list.has_key( vertex2 ):
- self.reachable_list[ vertex2 ] = [vertex1]
- else:
- self.reachable_list[vertex2].append(vertex1)
-
- def get_edge_value(self, vertex1, vertex2):
- """Retrieve the value corresponding to the edge between
- 'vertex1' and 'vertex2'. Raises KeyError if no such edge"""
- if vertex1 > vertex2:
- vertex1, vertex2 = vertex2, vertex1
- return self.edges[ (vertex1, vertex2) ]
-
- def is_acyclic(self):
- "Returns true if the graph is acyclic, otherwise false"
-
- # This is done by doing a depth-first search of the graph;
- # painting each vertex grey and then black. If the DFS
- # ever finds a vertex that isn't white, there's a cycle.
- colour = {}
- for i in range(self.num_vertices): colour[i] = WHITE
-
- # Loop over all vertices, taking white ones as starting
- # points for a traversal.
- for i in range(self.num_vertices):
- if colour[i] == WHITE:
-
- # List of vertices to visit
- visit_list = [ (None,i) ]
-
- # Do a DFS
- while visit_list:
- # Colour this vertex grey.
- parent, vertex = visit_list[0] ; del visit_list[0]
- colour[vertex] = GREY
-
- # Make copy of list of neighbours, removing the vertex
- # we arrived here from.
- neighbours = self.reachable_list.get(vertex, []) [:]
- if parent in neighbours: neighbours.remove( parent )
-
- for neighbour in neighbours:
- if colour[neighbour] == WHITE:
- visit_list.insert(0, (vertex, neighbour) )
- elif colour[neighbour] != WHITE:
- # Aha! Already visited this node,
- # so the graph isn't acyclic.
- return 0
-
- colour[vertex] = BLACK
-
- # We got through, so the graph is acyclic.
- return 1
-
- def assign_values(self):
- """Compute values for each vertex, so that they sum up
- properly to the associated value for each edge."""
-
- # Also done with a DFS; I simply copied the DFS code
- # from is_acyclic(). (Should generalize the logic so
- # one function could be used from both methods,
- # but I couldn't be bothered.)
-
- colour = {}
- for i in range(self.num_vertices): colour[i] = WHITE
-
- # Loop over all vertices, taking white ones as starting
- # points for a traversal.
- for i in range(self.num_vertices):
- if colour[i] == WHITE:
- # Set this vertex's value, arbitrarily, to zero.
- self.set_vertex_value( i, 0 )
-
- # List of vertices to visit
- visit_list = [ (None,i) ]
-
- # Do a DFS
- while visit_list:
- # Colour this vertex grey.
- parent, vertex = visit_list[0] ; del visit_list[0]
- colour[vertex] = GREY
-
- # Make copy of list of neighbours, removing the vertex
- # we arrived here from.
- neighbours = self.reachable_list.get(vertex, []) [:]
- if parent in neighbours: neighbours.remove( parent )
-
- for neighbour in self.reachable_list.get(vertex, []):
- edge_value = self.get_edge_value( vertex, neighbour )
- if colour[neighbour] == WHITE:
- visit_list.insert(0, (vertex, neighbour) )
-
- # Set new vertex's value to the desired
- # edge value, minus the value of the
- # vertex we came here from.
- new_val = (edge_value -
- self.get_vertex_value( vertex ) )
- self.set_vertex_value( neighbour,
- new_val % self.num_vertices)
-
- colour[vertex] = BLACK
-
- # Returns nothing
- return
-
- def __getitem__(self, index):
- if index < self.num_vertices: return index
- raise IndexError
-
- def get_vertex_value(self, vertex):
- "Get value for a vertex"
- return self.values[ vertex ]
-
- def set_vertex_value(self, vertex, value):
- "Set value for a vertex"
- self.values[ vertex ] = value
-
- def generate_code(self, out, width = 70):
- "Return nicely formatted table"
- out.write("{ ")
- pos = 0
- for v in self.values:
- v=str(v)+', '
- out.write(v)
- pos = pos + len(v) + 1
- if pos > width: out.write('\n '); pos = 0
- out.write('};\n')
-
-
-class PerfectHash:
- def __init__(self, cchMax, f1, f2, G, cHashElements, cKeys, maxHashValue):
- self.cchMax = cchMax
- self.f1 = f1
- self.f2 = f2
- self.G = G
- self.cHashElements = cHashElements
- self.cKeys = cKeys
- # determine the necessary type for storing our hash function
- # helper table:
- self.type = self.determineType(maxHashValue)
-
- def generate_header(self, structName):
- header = """
-#include "Python.h"
-#include <stdlib.h>
-
-/* --- C API ----------------------------------------------------*/
-/* C API for usage by other Python modules */
-typedef struct %(structName)s
-{
- unsigned long cKeys;
- unsigned long cchMax;
- unsigned long (*hash)(const char *key, unsigned int cch);
- const void *(*getValue)(unsigned long iKey);
-} %(structName)s;
-""" % { "structName" : structName }
- return header
-
- def determineType(self, maxHashValue):
- if maxHashValue <= 255:
- return "unsigned char"
- elif maxHashValue <= 65535:
- return "unsigned short"
- else:
- # Take the cheesy way out...
- return "unsigned long"
-
- def generate_code(self, moduleName, dataArrayName, dataArrayType, structName):
- # Output C code for the hash functions and tables
- code = """
-/*
- * The hash is produced using the algorithm described in
- * "Optimal algorithms for minimal perfect hashing",
- * G. Havas, B.S. Majewski. Available as a technical report
- * from the CS department, University of Queensland
- * (ftp://ftp.cs.uq.oz.au/).
- *
- * Generated using a heavily tweaked version of Andrew Kuchling's
- * perfect_hash.py:
- * http://starship.python.net/crew/amk/python/code/perfect-hash.html
- *
- * Generated on: %s
- */
-""" % time.ctime(time.time())
- # MSVC SP3 was complaining when I actually used a global constant
- code = code + """
-#define k_cHashElements %i
-#define k_cchMaxKey %d
-#define k_cKeys %i
-
-""" % (self.cHashElements, self.cchMax, self.cKeys)
-
- code = code + """
-staticforward const %s G[k_cHashElements];
-staticforward const %s %s[k_cKeys];
-""" % (self.type, dataArrayType, dataArrayName)
-
- code = code + """
-static long f1(const char *key, unsigned int cch)
-"""
- code = code + self.f1.generate_code()
- code = code + """
-
-static long f2(const char *key, unsigned int cch)
-"""
- code = code + self.f2.generate_code()
- code = code + """
-
-static unsigned long hash(const char *key, unsigned int cch)
-{
- return ((unsigned long)(G[ f1(key, cch) ]) + (unsigned long)(G[ f2(key, cch) ]) ) %% k_cHashElements;
-}
-
-const void *getValue(unsigned long iKey)
-{
- return &%(dataArrayName)s[iKey];
-}
-
-/* Helper for adding objects to dictionaries. Check for errors with
- PyErr_Occurred() */
-static
-void insobj(PyObject *dict,
- char *name,
- PyObject *v)
-{
- PyDict_SetItemString(dict, name, v);
- Py_XDECREF(v);
-}
-
-static const %(structName)s hashAPI =
-{
- k_cKeys,
- k_cchMaxKey,
- &hash,
- &getValue,
-};
-
-static
-PyMethodDef Module_methods[] =
-{
- {NULL, NULL},
-};
-
-static char *Module_docstring = "%(moduleName)s hash function module";
-
-/* Error reporting for module init functions */
-
-#define Py_ReportModuleInitError(modname) { \\
- PyObject *exc_type, *exc_value, *exc_tb; \\
- PyObject *str_type, *str_value; \\
- \\
- /* Fetch error objects and convert them to strings */ \\
- PyErr_Fetch(&exc_type, &exc_value, &exc_tb); \\
- if (exc_type && exc_value) { \\
- str_type = PyObject_Str(exc_type); \\
- str_value = PyObject_Str(exc_value); \\
- } \\
- else { \\
- str_type = NULL; \\
- str_value = NULL; \\
- } \\
- /* Try to format a more informative error message using the \\
- original error */ \\
- if (str_type && str_value && \\
- PyString_Check(str_type) && PyString_Check(str_value)) \\
- PyErr_Format( \\
- PyExc_ImportError, \\
- "initialization of module "modname" failed " \\
- "(%%s:%%s)", \\
- PyString_AS_STRING(str_type), \\
- PyString_AS_STRING(str_value)); \\
- else \\
- PyErr_SetString( \\
- PyExc_ImportError, \\
- "initialization of module "modname" failed"); \\
- Py_XDECREF(str_type); \\
- Py_XDECREF(str_value); \\
- Py_XDECREF(exc_type); \\
- Py_XDECREF(exc_value); \\
- Py_XDECREF(exc_tb); \\
-}
-
-
-/* Create PyMethodObjects and register them in the module\'s dict */
-DL_EXPORT(void)
-init%(moduleName)s(void)
-{
- PyObject *module, *moddict;
- /* Create module */
- module = Py_InitModule4("%(moduleName)s", /* Module name */
- Module_methods, /* Method list */
- Module_docstring, /* Module doc-string */
- (PyObject *)NULL, /* always pass this as *self */
- PYTHON_API_VERSION); /* API Version */
- if (module == NULL)
- goto onError;
- /* Add some constants to the module\'s dict */
- moddict = PyModule_GetDict(module);
- if (moddict == NULL)
- goto onError;
-
- /* Export C API */
- insobj(
- moddict,
- "%(moduleName)sAPI",
- PyCObject_FromVoidPtr((void *)&hashAPI, NULL));
-
-onError:
- /* Check for errors and report them */
- if (PyErr_Occurred())
- Py_ReportModuleInitError("%(moduleName)s");
- return;
-}
-""" % { "moduleName" : moduleName,
- "dataArrayName" : dataArrayName,
- "structName" : structName, }
-
- return code
-
- def generate_graph(self, out):
- out.write("""
-static const unsigned short G[] =
-""")
- self.G.generate_code(out)
-
-
-def generate_hash(keys, caseInsensitive=0,
- minC=None, initC=None,
- f1Seed=None, f2Seed=None,
- cIncrement=None, cTries=None):
- """Print out code for a perfect minimal hash. Input is a list of
- (key, desired hash value) tuples. """
-
- # K is the number of keys.
- K = len(keys)
-
- # We will be generating graphs of size N, where N = c * K.
- # The larger C is, the fewer trial graphs will need to be made, but
- # the resulting table is also larger. Increase this starting value
- # if you're impatient. After 50 failures, c will be increased by 0.025.
- if initC is None:
- initC = 1.5
-
- c = initC
- if cIncrement is None:
- cIncrement = 0.0025
-
- if cTries is None:
- cTries = 50
-
- # Number of trial graphs so far
- num_graphs = 0
- sys.stderr.write('Generating graphs... ')
-
- while 1:
- # N is the number of vertices in the graph G
- N = int(c*K)
- num_graphs = num_graphs + 1
- if (num_graphs % cTries) == 0:
- # Enough failures at this multiplier,
- # increase the multiplier and keep trying....
- c = c + cIncrement
-
- # Whats good with searching for a better
- # hash function if we exceed the size
- # of a function we've generated in the past....
- if minC is not None and \
- c > minC:
- c = initC
- sys.stderr.write(' -- c > minC, resetting c to %0.4f\n' % c)
- else:
- sys.stderr.write(' -- increasing c to %0.4f\n' % c)
- sys.stderr.write('Generating graphs... ')
-
- # Output a progress message
- sys.stderr.write( str(num_graphs) + ' ')
- sys.stderr.flush()
-
- # Create graph w/ N vertices
- G = Graph(N)
- # Save the seeds used to generate
- # the following two hash functions.
- _seeds = whrandom._inst._seed
-
- # Create 2 random hash functions
- f1 = Hash(N, caseInsensitive)
- f2 = Hash(N, caseInsensitive)
-
- # Set the initial hash function seed values if passed in.
- # Doing this protects our hash functions from
- # changes to whrandom's behavior.
- if f1Seed is not None:
- f1.seed = f1Seed
- f1Seed = None
- fSpecifiedSeeds = 1
- if f2Seed is not None:
- f2.seed = f2Seed
- f2Seed = None
- fSpecifiedSeeds = 1
-
- # Connect vertices given by the values of the two hash functions
- # for each key. Associate the desired hash value with each
- # edge.
- for k, v in keys:
- h1 = f1(k) ; h2 = f2(k)
- G.connect( h1, h2, v)
-
- # Check if the resulting graph is acyclic; if it is,
- # we're done with step 1.
- if G.is_acyclic():
- break
- elif fSpecifiedSeeds:
- sys.stderr.write('\nThe initial f1/f2 seeds you specified didn\'t generate a perfect hash function: \n')
- sys.stderr.write('f1 seed: %s\n' % f1.seed)
- sys.stderr.write('f2 seed: %s\n' % f2.seed)
- sys.stderr.write('multipler: %s\n' % c)
- sys.stderr.write('Your data has likely changed, or you forgot what your initial multiplier should be.\n')
- sys.stderr.write('continuing the search for a perfect hash function......\n')
- fSpecifiedSeeds = 0
-
- # Now we have an acyclic graph, so we assign values to each vertex
- # such that, for each edge, you can add the values for the two vertices
- # involved and get the desired value for that edge -- which is the
- # desired hash key. This task is dead easy, because the graph is acyclic.
- sys.stderr.write('\nAcyclic graph found; computing vertex values...\n')
- G.assign_values()
-
- sys.stderr.write('Checking uniqueness of hash values...\n')
-
- # Sanity check the result by actually verifying that all the keys
- # hash to the right value.
- cchMaxKey = 0
- maxHashValue = 0
-
- for k, v in keys:
- hash1 = G.values[ f1(k) ]
- hash2 = G.values[ f2(k) ]
- if hash1 > maxHashValue:
- maxHashValue = hash1
- if hash2 > maxHashValue:
- maxHashValue = hash2
- perfecthash = (hash1 + hash2) % N
- assert perfecthash == v
- cch = len(k)
- if cch > cchMaxKey:
- cchMaxKey = cch
-
- sys.stderr.write('Found perfect hash function!\n')
- sys.stderr.write('\nIn order to regenerate this hash function, \n')
- sys.stderr.write('you need to pass these following values back in:\n')
- sys.stderr.write('f1 seed: %s\n' % hex(f1.seed))
- sys.stderr.write('f2 seed: %s\n' % hex(f2.seed))
- sys.stderr.write('initial multipler: %s\n' % c)
-
- return PerfectHash(cchMaxKey, f1, f2, G, N, len(keys), maxHashValue)
-
diff --git a/Tools/perfecthash/perfhash.c b/Tools/perfecthash/perfhash.c
deleted file mode 100644
index 6e8a8b5..0000000
--- a/Tools/perfecthash/perfhash.c
+++ /dev/null
@@ -1,114 +0,0 @@
-#include <Python.h>
-
-static PyObject * hashFunction(PyObject *self, PyObject *args, PyObject *kw)
-{
- PyStringObject *a;
- register int len;
- register unsigned char *p;
- register unsigned long x;
- unsigned long ulSeed;
- unsigned long cchSeed;
- unsigned long cHashElements;
-
- if (!PyArg_ParseTuple(args, "llOl:hash",
- &ulSeed, &cchSeed, &a, &cHashElements))
- return NULL;
- if (!PyString_Check(a))
- {
- PyErr_SetString(PyExc_TypeError, "arg 3 needs to be a string");
- return NULL;
- }
-
- len = a->ob_size;
- p = (unsigned char *) a->ob_sval;
- x = ulSeed;
- while (--len >= 0)
- {
- /* (1000003 * x) ^ *p++
- * translated to handle > 32 bit longs
- */
- x = (0xf4243 * x);
- x = x & 0xFFFFFFFF;
- x = x ^ *p++;
- }
- x ^= a->ob_size + cchSeed;
- if (x == 0xFFFFFFFF)
- x = 0xfffffffe;
- if (x & 0x80000000)
- {
- /* Emulate Python 32-bit signed (2's complement)
- * modulo operation
- */
- x = (~x & 0xFFFFFFFF) + 1;
- x %= cHashElements;
- if (x != 0)
- {
- x = x + (~cHashElements & 0xFFFFFFFF) + 1;
- x = (~x & 0xFFFFFFFF) + 1;
- }
- }
- else
- x %= cHashElements;
- return PyInt_FromLong((long)x);
-}
-
-static PyObject * calcSeed(PyObject *self, PyObject *args, PyObject *kw)
-{
- PyStringObject *a;
- register int len;
- register unsigned char *p;
- register unsigned long x;
-
- if (!PyString_Check(args))
- {
- PyErr_SetString(PyExc_TypeError, "arg 1 expected a string, but didn't get it.");
- return NULL;
- }
-
- a = (PyStringObject *)args;
-
- len = a->ob_size;
- p = (unsigned char *) a->ob_sval;
- x = (*p << 7) & 0xFFFFFFFF;
- while (--len >= 0)
- {
- /* (1000003 * x) ^ *p++
- * translated to handle > 32 bit longs
- */
- x = (0xf4243 * x);
- x = x & 0xFFFFFFFF;
- x = x ^ *p++;
- }
- return PyInt_FromLong((long)x);
-}
-
-
-static struct PyMethodDef hashMethods[] = {
- { "calcSeed", calcSeed, 0, NULL },
- { "hash", hashFunction, 0, NULL },
- { NULL, NULL, 0, NULL } /* sentinel */
-};
-
-#ifdef _MSC_VER
-_declspec(dllexport)
-#endif
-void initperfhash(void)
-{
- PyObject *m;
-
- m = Py_InitModule4("perfhash", hashMethods,
- NULL, NULL, PYTHON_API_VERSION);
- if ( m == NULL )
- Py_FatalError("can't initialize module perfhash");
-}
-
-
-
-
-
-
-
-
-
-
-