diff options
Diffstat (limited to 'Tools/unicode/makeunicodedata.py')
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 229 |
1 files changed, 114 insertions, 115 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index 673f713..0aabdf7 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -60,21 +60,21 @@ UPPER_MASK = 0x80 def maketables(trace=0): - print "--- Reading", UNICODE_DATA % "", "..." + print("--- Reading", UNICODE_DATA % "", "...") version = "" unicode = UnicodeData(UNICODE_DATA % version, COMPOSITION_EXCLUSIONS % version, EASTASIAN_WIDTH % version) - print len(filter(None, unicode.table)), "characters" + print(len(filter(None, unicode.table)), "characters") for version in old_versions: - print "--- Reading", UNICODE_DATA % ("-"+version), "..." + print("--- Reading", UNICODE_DATA % ("-"+version), "...") old_unicode = UnicodeData(UNICODE_DATA % ("-"+version), COMPOSITION_EXCLUSIONS % ("-"+version), EASTASIAN_WIDTH % ("-"+version)) - print len(filter(None, old_unicode.table)), "characters" + print(len(filter(None, old_unicode.table)), "characters") merge_old_version(version, unicode, old_unicode) makeunicodename(unicode, trace) @@ -93,7 +93,7 @@ def makeunicodedata(unicode, trace): FILE = "Modules/unicodedata_db.h" - print "--- Preparing", FILE, "..." + print("--- Preparing", FILE, "...") # 1) database properties @@ -203,93 +203,92 @@ def makeunicodedata(unicode, trace): l = comp_last[l] comp_data[f*total_last+l] = char - print len(table), "unique properties" - print len(decomp_prefix), "unique decomposition prefixes" - print len(decomp_data), "unique decomposition entries:", - print decomp_size, "bytes" - print total_first, "first characters in NFC" - print total_last, "last characters in NFC" - print len(comp_pairs), "NFC pairs" + print(len(table), "unique properties") + print(len(decomp_prefix), "unique decomposition prefixes") + print(len(decomp_data), "unique decomposition entries:", end=' ') + print(decomp_size, "bytes") + print(total_first, "first characters in NFC") + print(total_last, "last characters in NFC") + print(len(comp_pairs), "NFC pairs") - print "--- Writing", FILE, "..." + print("--- Writing", FILE, "...") fp = open(FILE, "w") - print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) - print >>fp - print >>fp, '#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION - print >>fp, "/* a list of unique database records */" - print >>fp, \ - "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {" + print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp) + print(file=fp) + print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp) + print("/* a list of unique database records */", file=fp) + print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp) for item in table: - print >>fp, " {%d, %d, %d, %d, %d}," % item - print >>fp, "};" - print >>fp - - print >>fp, "/* Reindexing of NFC first characters. */" - print >>fp, "#define TOTAL_FIRST",total_first - print >>fp, "#define TOTAL_LAST",total_last - print >>fp, "struct reindex{int start;short count,index;};" - print >>fp, "struct reindex nfc_first[] = {" + print(" {%d, %d, %d, %d, %d}," % item, file=fp) + print("};", file=fp) + print(file=fp) + + print("/* Reindexing of NFC first characters. */", file=fp) + print("#define TOTAL_FIRST",total_first, file=fp) + print("#define TOTAL_LAST",total_last, file=fp) + print("struct reindex{int start;short count,index;};", file=fp) + print("struct reindex nfc_first[] = {", file=fp) for start,end in comp_first_ranges: - print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start]) - print >>fp," {0,0,0}" - print >>fp,"};\n" - print >>fp, "struct reindex nfc_last[] = {" + print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp) + print(" {0,0,0}", file=fp) + print("};\n", file=fp) + print("struct reindex nfc_last[] = {", file=fp) for start,end in comp_last_ranges: - print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start]) - print >>fp," {0,0,0}" - print >>fp,"};\n" + print(" { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp) + print(" {0,0,0}", file=fp) + print("};\n", file=fp) # FIXME: <fl> the following tables could be made static, and # the support code moved into unicodedatabase.c - print >>fp, "/* string literals */" - print >>fp, "const char *_PyUnicode_CategoryNames[] = {" + print("/* string literals */", file=fp) + print("const char *_PyUnicode_CategoryNames[] = {", file=fp) for name in CATEGORY_NAMES: - print >>fp, " \"%s\"," % name - print >>fp, " NULL" - print >>fp, "};" + print(" \"%s\"," % name, file=fp) + print(" NULL", file=fp) + print("};", file=fp) - print >>fp, "const char *_PyUnicode_BidirectionalNames[] = {" + print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp) for name in BIDIRECTIONAL_NAMES: - print >>fp, " \"%s\"," % name - print >>fp, " NULL" - print >>fp, "};" + print(" \"%s\"," % name, file=fp) + print(" NULL", file=fp) + print("};", file=fp) - print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {" + print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp) for name in EASTASIANWIDTH_NAMES: - print >>fp, " \"%s\"," % name - print >>fp, " NULL" - print >>fp, "};" + print(" \"%s\"," % name, file=fp) + print(" NULL", file=fp) + print("};", file=fp) - print >>fp, "static const char *decomp_prefix[] = {" + print("static const char *decomp_prefix[] = {", file=fp) for name in decomp_prefix: - print >>fp, " \"%s\"," % name - print >>fp, " NULL" - print >>fp, "};" + print(" \"%s\"," % name, file=fp) + print(" NULL", file=fp) + print("};", file=fp) # split record index table index1, index2, shift = splitbins(index, trace) - print >>fp, "/* index tables for the database records */" - print >>fp, "#define SHIFT", shift + print("/* index tables for the database records */", file=fp) + print("#define SHIFT", shift, file=fp) Array("index1", index1).dump(fp, trace) Array("index2", index2).dump(fp, trace) # split decomposition index table index1, index2, shift = splitbins(decomp_index, trace) - print >>fp, "/* decomposition data */" + print("/* decomposition data */", file=fp) Array("decomp_data", decomp_data).dump(fp, trace) - print >>fp, "/* index tables for the decomposition data */" - print >>fp, "#define DECOMP_SHIFT", shift + print("/* index tables for the decomposition data */", file=fp) + print("#define DECOMP_SHIFT", shift, file=fp) Array("decomp_index1", index1).dump(fp, trace) Array("decomp_index2", index2).dump(fp, trace) index, index2, shift = splitbins(comp_data, trace) - print >>fp, "/* NFC pairs */" - print >>fp, "#define COMP_SHIFT", shift + print("/* NFC pairs */", file=fp) + print("#define COMP_SHIFT", shift, file=fp) Array("comp_index", index).dump(fp, trace) Array("comp_data", index2).dump(fp, trace) @@ -306,30 +305,30 @@ def makeunicodedata(unicode, trace): index[i] = cache[record] = len(records) records.append(record) index1, index2, shift = splitbins(index, trace) - print >>fp, "static const change_record change_records_%s[] = {" % cversion + print("static const change_record change_records_%s[] = {" % cversion, file=fp) for record in records: - print >>fp, "\t{ %s }," % ", ".join(map(str,record)) - print >>fp, "};" + print("\t{ %s }," % ", ".join(map(str,record)), file=fp) + print("};", file=fp) Array("changes_%s_index" % cversion, index1).dump(fp, trace) Array("changes_%s_data" % cversion, index2).dump(fp, trace) - print >>fp, "static const change_record* get_change_%s(Py_UCS4 n)" % cversion - print >>fp, "{" - print >>fp, "\tint index;" - print >>fp, "\tif (n >= 0x110000) index = 0;" - print >>fp, "\telse {" - print >>fp, "\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift) - print >>fp, "\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \ - (cversion, shift, ((1<<shift)-1)) - print >>fp, "\t}" - print >>fp, "\treturn change_records_%s+index;" % cversion - print >>fp, "}\n" - print >>fp, "static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion - print >>fp, "{" - print >>fp, "\tswitch(n) {" + print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp) + print("{", file=fp) + print("\tint index;", file=fp) + print("\tif (n >= 0x110000) index = 0;", file=fp) + print("\telse {", file=fp) + print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp) + print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \ + (cversion, shift, ((1<<shift)-1)), file=fp) + print("\t}", file=fp) + print("\treturn change_records_%s+index;" % cversion, file=fp) + print("}\n", file=fp) + print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp) + print("{", file=fp) + print("\tswitch(n) {", file=fp) for k, v in normalization: - print >>fp, "\tcase %s: return 0x%s;" % (hex(k), v) - print >>fp, "\tdefault: return 0;" - print >>fp, "\t}\n}\n" + print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp) + print("\tdefault: return 0;", file=fp) + print("\t}\n}\n", file=fp) fp.close() @@ -340,7 +339,7 @@ def makeunicodetype(unicode, trace): FILE = "Objects/unicodetype_db.h" - print "--- Preparing", FILE, "..." + print("--- Preparing", FILE, "...") # extract unicode types dummy = (0, 0, 0, 0, 0, 0) @@ -405,25 +404,25 @@ def makeunicodetype(unicode, trace): table.append(item) index[char] = i - print len(table), "unique character type entries" + print(len(table), "unique character type entries") - print "--- Writing", FILE, "..." + print("--- Writing", FILE, "...") fp = open(FILE, "w") - print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) - print >>fp - print >>fp, "/* a list of unique character type descriptors */" - print >>fp, "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {" + print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp) + print(file=fp) + print("/* a list of unique character type descriptors */", file=fp) + print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp) for item in table: - print >>fp, " {%d, %d, %d, %d, %d, %d}," % item - print >>fp, "};" - print >>fp + print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp) + print("};", file=fp) + print(file=fp) # split decomposition index table index1, index2, shift = splitbins(index, trace) - print >>fp, "/* type indexes */" - print >>fp, "#define SHIFT", shift + print("/* type indexes */", file=fp) + print("#define SHIFT", shift, file=fp) Array("index1", index1).dump(fp, trace) Array("index2", index2).dump(fp, trace) @@ -436,7 +435,7 @@ def makeunicodename(unicode, trace): FILE = "Modules/unicodename_db.h" - print "--- Preparing", FILE, "..." + print("--- Preparing", FILE, "...") # collect names names = [None] * len(unicode.chars) @@ -448,7 +447,7 @@ def makeunicodename(unicode, trace): if name and name[0] != "<": names[char] = name + chr(0) - print len(filter(lambda n: n is not None, names)), "distinct names" + print(len(filter(lambda n: n is not None, names)), "distinct names") # collect unique words from names (note that we differ between # words inside a sentence, and words ending a sentence. the @@ -469,7 +468,7 @@ def makeunicodename(unicode, trace): else: words[w] = [len(words)] - print n, "words in text;", b, "bytes" + print(n, "words in text;", b, "bytes") wordlist = words.items() @@ -485,19 +484,19 @@ def makeunicodename(unicode, trace): escapes = 0 while escapes * 256 < len(wordlist): escapes = escapes + 1 - print escapes, "escapes" + print(escapes, "escapes") short = 256 - escapes assert short > 0 - print short, "short indexes in lexicon" + print(short, "short indexes in lexicon") # statistics n = 0 for i in range(short): n = n + len(wordlist[i][1]) - print n, "short indexes in phrasebook" + print(n, "short indexes in phrasebook") # pick the most commonly used words, and sort the rest on falling # length (to maximize overlap) @@ -566,29 +565,29 @@ def makeunicodename(unicode, trace): codehash = Hash("code", data, 47) - print "--- Writing", FILE, "..." + print("--- Writing", FILE, "...") fp = open(FILE, "w") - print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION) - print >>fp - print >>fp, "#define NAME_MAXLEN", 256 - print >>fp - print >>fp, "/* lexicon */" + print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp) + print(file=fp) + print("#define NAME_MAXLEN", 256, file=fp) + print(file=fp) + print("/* lexicon */", file=fp) Array("lexicon", lexicon).dump(fp, trace) Array("lexicon_offset", lexicon_offset).dump(fp, trace) # split decomposition index table offset1, offset2, shift = splitbins(phrasebook_offset, trace) - print >>fp, "/* code->name phrasebook */" - print >>fp, "#define phrasebook_shift", shift - print >>fp, "#define phrasebook_short", short + print("/* code->name phrasebook */", file=fp) + print("#define phrasebook_shift", shift, file=fp) + print("#define phrasebook_short", short, file=fp) Array("phrasebook", phrasebook).dump(fp, trace) Array("phrasebook_offset1", offset1).dump(fp, trace) Array("phrasebook_offset2", offset2).dump(fp, trace) - print >>fp, "/* name->code dictionary */" + print("/* name->code dictionary */", file=fp) codehash.dump(fp, trace) fp.close() @@ -781,7 +780,7 @@ class Hash: else: raise AssertionError, "ran out of polynominals" - print size, "slots in hash table" + print(size, "slots in hash table") table = [None] * size @@ -813,7 +812,7 @@ class Hash: if incr > mask: incr = incr ^ poly - print n, "collisions" + print(n, "collisions") self.collisions = n for i in range(len(table)): @@ -845,7 +844,7 @@ class Array: # write data to file, as a C array size = getsize(self.data) if trace: - print >>sys.stderr, self.name+":", size*len(self.data), "bytes" + print(self.name+":", size*len(self.data), "bytes", file=sys.stderr) file.write("static ") if size == 1: file.write("unsigned char") @@ -895,10 +894,10 @@ def splitbins(t, trace=0): import sys if trace: def dump(t1, t2, shift, bytes): - print >>sys.stderr, "%d+%d bins at shift %d; %d bytes" % ( - len(t1), len(t2), shift, bytes) - print >>sys.stderr, "Size of original table:", len(t)*getsize(t), \ - "bytes" + print("%d+%d bins at shift %d; %d bytes" % ( + len(t1), len(t2), shift, bytes), file=sys.stderr) + print("Size of original table:", len(t)*getsize(t), \ + "bytes", file=sys.stderr) n = len(t)-1 # last valid index maxshift = 0 # the most we can shift n and still have something left if n > 0: @@ -930,7 +929,7 @@ def splitbins(t, trace=0): bytes = b t1, t2, shift = best if trace: - print >>sys.stderr, "Best:", + print("Best:", end=' ', file=sys.stderr) dump(t1, t2, shift, bytes) if __debug__: # exhaustively verify that the decomposition is correct |