diff options
author | Andrew M. Kuchling <amk@amk.ca> | 2000-07-29 13:24:39 (GMT) |
---|---|---|
committer | Andrew M. Kuchling <amk@amk.ca> | 2000-07-29 13:24:39 (GMT) |
commit | 7e11170e8531c17f3c74cbd2d243062cf3dd04bd (patch) | |
tree | 074c2f20bd6aa5fb1b04702674eb2379298a89ee /Tools/perfecthash/GenUCNHash.py | |
parent | 7a4409c1b2e16fa2a4a6dbc93d67746dbbab4b5c (diff) | |
download | cpython-7e11170e8531c17f3c74cbd2d243062cf3dd04bd.zip cpython-7e11170e8531c17f3c74cbd2d243062cf3dd04bd.tar.gz cpython-7e11170e8531c17f3c74cbd2d243062cf3dd04bd.tar.bz2 |
Removed Tools/perfecthash, per python-dev discussion
Diffstat (limited to 'Tools/perfecthash/GenUCNHash.py')
-rw-r--r-- | Tools/perfecthash/GenUCNHash.py | 109 |
1 files changed, 0 insertions, 109 deletions
diff --git a/Tools/perfecthash/GenUCNHash.py b/Tools/perfecthash/GenUCNHash.py deleted file mode 100644 index ec69341..0000000 --- a/Tools/perfecthash/GenUCNHash.py +++ /dev/null @@ -1,109 +0,0 @@ -#! /usr/bin/env python -import sys -import string -import perfect_hash - -# This is a user of perfect_hash.py -# that takes as input the UnicodeData.txt file available from: -# ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt - -# It generates a hash table from Unicode Character Name -> -# unicode code space value. - -# These variables determine which hash function is tried first. -# Yields a multiple of 1.7875 for UnicodeData.txt on 2000/06/24/ -f1Seed = 0x64fc2234 -f2Seed = 0x8db7d737 - -# Maximum allowed multipler, if this isn't None then instead of continually -# increasing C, it resets it back to initC to keep searching for -# a solution. -minC = 1.7875 -# Initial multiplier for trying to find a perfect hash function. -initC = 1.7875 - -moduleName = "ucnhash" -dataArrayName = "aucn" -dataArrayType = "_Py_UnicodeCharacterName" -headerFileName = "ucnhash.h" -cFileName = "ucnhash.c" -structName = "_Py_UCNHashAPI" - -keys = [] -hashData = {} - -def generateOutputFiles(perfHash, hashData): - header = perfHash.generate_header(structName) - header = header + """ -typedef struct -{ - const char *pszUCN; - Py_UCS4 value; -} _Py_UnicodeCharacterName; - -""" - - code = perfHash.generate_code(moduleName, - dataArrayName, - dataArrayType, - structName) - out = open(headerFileName, "w") - out.write(header) - out = open(cFileName, "w") - out.write("#include \"%s\"\n" % headerFileName) - out.write(code) - perfHash.generate_graph(out) - out.write(""" - -static const _Py_UnicodeCharacterName aucn[] = -{ -""") - for i in xrange(len(keys)): - v = hashData[keys[i][0]] - out.write(' { "' + keys[i][0] + '", ' + hex(v) + " }," + "\n") - out.write("};\n\n") - sys.stderr.write('\nGenerated output files: \n') - sys.stderr.write('%s\n%s\n' % (headerFileName, cFileName)) - -def main(): - # Suck in UnicodeData.txt and spit out the generated files. - input = open(sys.argv[1], 'r') - i = 0 - while 1: - line = input.readline() - if line == "": break - fields = string.split(line, ';') - if len(fields) < 2: - sys.stderr.write('Ill-formated line!\n') - sys.stderr.write('line #: %d\n' % (i + 1)) - sys.exit() - data, key = fields[:2] - key = string.strip( key ) - # Any name starting with '<' is a control, or start/end character, - # so skip it... - if key[0] == "<": - continue - hashcode = i - i = i + 1 - # force the name to uppercase - keys.append( (string.upper(key),hashcode) ) - data = string.atoi(data, 16) - hashData[key] = data - - input.close() - sys.stderr.write('%i key/hash pairs read\n' % len(keys) ) - perfHash = perfect_hash.generate_hash(keys, 1, - minC, initC, - f1Seed, f2Seed, - # increment, tries - 0.0025, 50) - generateOutputFiles(perfHash, hashData) - -if __name__ == '__main__': - if len(sys.argv) == 1: - sys.stdout = sys.stderr - print 'Usage: %s <input filename>' % sys.argv[0] - print ' The input file needs to be UnicodeData.txt' - sys.exit() - main() - |