summaryrefslogtreecommitdiffstats
path: root/Tools/unicode/makeunicodedata.py
diff options
context:
space:
mode:
Diffstat (limited to 'Tools/unicode/makeunicodedata.py')
-rw-r--r--Tools/unicode/makeunicodedata.py229
1 files changed, 114 insertions, 115 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 673f713..0aabdf7 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -60,21 +60,21 @@ UPPER_MASK = 0x80
def maketables(trace=0):
- print "--- Reading", UNICODE_DATA % "", "..."
+ print("--- Reading", UNICODE_DATA % "", "...")
version = ""
unicode = UnicodeData(UNICODE_DATA % version,
COMPOSITION_EXCLUSIONS % version,
EASTASIAN_WIDTH % version)
- print len(filter(None, unicode.table)), "characters"
+ print(len(filter(None, unicode.table)), "characters")
for version in old_versions:
- print "--- Reading", UNICODE_DATA % ("-"+version), "..."
+ print("--- Reading", UNICODE_DATA % ("-"+version), "...")
old_unicode = UnicodeData(UNICODE_DATA % ("-"+version),
COMPOSITION_EXCLUSIONS % ("-"+version),
EASTASIAN_WIDTH % ("-"+version))
- print len(filter(None, old_unicode.table)), "characters"
+ print(len(filter(None, old_unicode.table)), "characters")
merge_old_version(version, unicode, old_unicode)
makeunicodename(unicode, trace)
@@ -93,7 +93,7 @@ def makeunicodedata(unicode, trace):
FILE = "Modules/unicodedata_db.h"
- print "--- Preparing", FILE, "..."
+ print("--- Preparing", FILE, "...")
# 1) database properties
@@ -203,93 +203,92 @@ def makeunicodedata(unicode, trace):
l = comp_last[l]
comp_data[f*total_last+l] = char
- print len(table), "unique properties"
- print len(decomp_prefix), "unique decomposition prefixes"
- print len(decomp_data), "unique decomposition entries:",
- print decomp_size, "bytes"
- print total_first, "first characters in NFC"
- print total_last, "last characters in NFC"
- print len(comp_pairs), "NFC pairs"
+ print(len(table), "unique properties")
+ print(len(decomp_prefix), "unique decomposition prefixes")
+ print(len(decomp_data), "unique decomposition entries:", end=' ')
+ print(decomp_size, "bytes")
+ print(total_first, "first characters in NFC")
+ print(total_last, "last characters in NFC")
+ print(len(comp_pairs), "NFC pairs")
- print "--- Writing", FILE, "..."
+ print("--- Writing", FILE, "...")
fp = open(FILE, "w")
- print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
- print >>fp
- print >>fp, '#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION
- print >>fp, "/* a list of unique database records */"
- print >>fp, \
- "const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {"
+ print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
+ print(file=fp)
+ print('#define UNIDATA_VERSION "%s"' % UNIDATA_VERSION, file=fp)
+ print("/* a list of unique database records */", file=fp)
+ print("const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {", file=fp)
for item in table:
- print >>fp, " {%d, %d, %d, %d, %d}," % item
- print >>fp, "};"
- print >>fp
-
- print >>fp, "/* Reindexing of NFC first characters. */"
- print >>fp, "#define TOTAL_FIRST",total_first
- print >>fp, "#define TOTAL_LAST",total_last
- print >>fp, "struct reindex{int start;short count,index;};"
- print >>fp, "struct reindex nfc_first[] = {"
+ print(" {%d, %d, %d, %d, %d}," % item, file=fp)
+ print("};", file=fp)
+ print(file=fp)
+
+ print("/* Reindexing of NFC first characters. */", file=fp)
+ print("#define TOTAL_FIRST",total_first, file=fp)
+ print("#define TOTAL_LAST",total_last, file=fp)
+ print("struct reindex{int start;short count,index;};", file=fp)
+ print("struct reindex nfc_first[] = {", file=fp)
for start,end in comp_first_ranges:
- print >>fp," { %d, %d, %d}," % (start,end-start,comp_first[start])
- print >>fp," {0,0,0}"
- print >>fp,"};\n"
- print >>fp, "struct reindex nfc_last[] = {"
+ print(" { %d, %d, %d}," % (start,end-start,comp_first[start]), file=fp)
+ print(" {0,0,0}", file=fp)
+ print("};\n", file=fp)
+ print("struct reindex nfc_last[] = {", file=fp)
for start,end in comp_last_ranges:
- print >>fp," { %d, %d, %d}," % (start,end-start,comp_last[start])
- print >>fp," {0,0,0}"
- print >>fp,"};\n"
+ print(" { %d, %d, %d}," % (start,end-start,comp_last[start]), file=fp)
+ print(" {0,0,0}", file=fp)
+ print("};\n", file=fp)
# FIXME: <fl> the following tables could be made static, and
# the support code moved into unicodedatabase.c
- print >>fp, "/* string literals */"
- print >>fp, "const char *_PyUnicode_CategoryNames[] = {"
+ print("/* string literals */", file=fp)
+ print("const char *_PyUnicode_CategoryNames[] = {", file=fp)
for name in CATEGORY_NAMES:
- print >>fp, " \"%s\"," % name
- print >>fp, " NULL"
- print >>fp, "};"
+ print(" \"%s\"," % name, file=fp)
+ print(" NULL", file=fp)
+ print("};", file=fp)
- print >>fp, "const char *_PyUnicode_BidirectionalNames[] = {"
+ print("const char *_PyUnicode_BidirectionalNames[] = {", file=fp)
for name in BIDIRECTIONAL_NAMES:
- print >>fp, " \"%s\"," % name
- print >>fp, " NULL"
- print >>fp, "};"
+ print(" \"%s\"," % name, file=fp)
+ print(" NULL", file=fp)
+ print("};", file=fp)
- print >>fp, "const char *_PyUnicode_EastAsianWidthNames[] = {"
+ print("const char *_PyUnicode_EastAsianWidthNames[] = {", file=fp)
for name in EASTASIANWIDTH_NAMES:
- print >>fp, " \"%s\"," % name
- print >>fp, " NULL"
- print >>fp, "};"
+ print(" \"%s\"," % name, file=fp)
+ print(" NULL", file=fp)
+ print("};", file=fp)
- print >>fp, "static const char *decomp_prefix[] = {"
+ print("static const char *decomp_prefix[] = {", file=fp)
for name in decomp_prefix:
- print >>fp, " \"%s\"," % name
- print >>fp, " NULL"
- print >>fp, "};"
+ print(" \"%s\"," % name, file=fp)
+ print(" NULL", file=fp)
+ print("};", file=fp)
# split record index table
index1, index2, shift = splitbins(index, trace)
- print >>fp, "/* index tables for the database records */"
- print >>fp, "#define SHIFT", shift
+ print("/* index tables for the database records */", file=fp)
+ print("#define SHIFT", shift, file=fp)
Array("index1", index1).dump(fp, trace)
Array("index2", index2).dump(fp, trace)
# split decomposition index table
index1, index2, shift = splitbins(decomp_index, trace)
- print >>fp, "/* decomposition data */"
+ print("/* decomposition data */", file=fp)
Array("decomp_data", decomp_data).dump(fp, trace)
- print >>fp, "/* index tables for the decomposition data */"
- print >>fp, "#define DECOMP_SHIFT", shift
+ print("/* index tables for the decomposition data */", file=fp)
+ print("#define DECOMP_SHIFT", shift, file=fp)
Array("decomp_index1", index1).dump(fp, trace)
Array("decomp_index2", index2).dump(fp, trace)
index, index2, shift = splitbins(comp_data, trace)
- print >>fp, "/* NFC pairs */"
- print >>fp, "#define COMP_SHIFT", shift
+ print("/* NFC pairs */", file=fp)
+ print("#define COMP_SHIFT", shift, file=fp)
Array("comp_index", index).dump(fp, trace)
Array("comp_data", index2).dump(fp, trace)
@@ -306,30 +305,30 @@ def makeunicodedata(unicode, trace):
index[i] = cache[record] = len(records)
records.append(record)
index1, index2, shift = splitbins(index, trace)
- print >>fp, "static const change_record change_records_%s[] = {" % cversion
+ print("static const change_record change_records_%s[] = {" % cversion, file=fp)
for record in records:
- print >>fp, "\t{ %s }," % ", ".join(map(str,record))
- print >>fp, "};"
+ print("\t{ %s }," % ", ".join(map(str,record)), file=fp)
+ print("};", file=fp)
Array("changes_%s_index" % cversion, index1).dump(fp, trace)
Array("changes_%s_data" % cversion, index2).dump(fp, trace)
- print >>fp, "static const change_record* get_change_%s(Py_UCS4 n)" % cversion
- print >>fp, "{"
- print >>fp, "\tint index;"
- print >>fp, "\tif (n >= 0x110000) index = 0;"
- print >>fp, "\telse {"
- print >>fp, "\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift)
- print >>fp, "\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
- (cversion, shift, ((1<<shift)-1))
- print >>fp, "\t}"
- print >>fp, "\treturn change_records_%s+index;" % cversion
- print >>fp, "}\n"
- print >>fp, "static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion
- print >>fp, "{"
- print >>fp, "\tswitch(n) {"
+ print("static const change_record* get_change_%s(Py_UCS4 n)" % cversion, file=fp)
+ print("{", file=fp)
+ print("\tint index;", file=fp)
+ print("\tif (n >= 0x110000) index = 0;", file=fp)
+ print("\telse {", file=fp)
+ print("\t\tindex = changes_%s_index[n>>%d];" % (cversion, shift), file=fp)
+ print("\t\tindex = changes_%s_data[(index<<%d)+(n & %d)];" % \
+ (cversion, shift, ((1<<shift)-1)), file=fp)
+ print("\t}", file=fp)
+ print("\treturn change_records_%s+index;" % cversion, file=fp)
+ print("}\n", file=fp)
+ print("static Py_UCS4 normalization_%s(Py_UCS4 n)" % cversion, file=fp)
+ print("{", file=fp)
+ print("\tswitch(n) {", file=fp)
for k, v in normalization:
- print >>fp, "\tcase %s: return 0x%s;" % (hex(k), v)
- print >>fp, "\tdefault: return 0;"
- print >>fp, "\t}\n}\n"
+ print("\tcase %s: return 0x%s;" % (hex(k), v), file=fp)
+ print("\tdefault: return 0;", file=fp)
+ print("\t}\n}\n", file=fp)
fp.close()
@@ -340,7 +339,7 @@ def makeunicodetype(unicode, trace):
FILE = "Objects/unicodetype_db.h"
- print "--- Preparing", FILE, "..."
+ print("--- Preparing", FILE, "...")
# extract unicode types
dummy = (0, 0, 0, 0, 0, 0)
@@ -405,25 +404,25 @@ def makeunicodetype(unicode, trace):
table.append(item)
index[char] = i
- print len(table), "unique character type entries"
+ print(len(table), "unique character type entries")
- print "--- Writing", FILE, "..."
+ print("--- Writing", FILE, "...")
fp = open(FILE, "w")
- print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
- print >>fp
- print >>fp, "/* a list of unique character type descriptors */"
- print >>fp, "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
+ print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
+ print(file=fp)
+ print("/* a list of unique character type descriptors */", file=fp)
+ print("const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {", file=fp)
for item in table:
- print >>fp, " {%d, %d, %d, %d, %d, %d}," % item
- print >>fp, "};"
- print >>fp
+ print(" {%d, %d, %d, %d, %d, %d}," % item, file=fp)
+ print("};", file=fp)
+ print(file=fp)
# split decomposition index table
index1, index2, shift = splitbins(index, trace)
- print >>fp, "/* type indexes */"
- print >>fp, "#define SHIFT", shift
+ print("/* type indexes */", file=fp)
+ print("#define SHIFT", shift, file=fp)
Array("index1", index1).dump(fp, trace)
Array("index2", index2).dump(fp, trace)
@@ -436,7 +435,7 @@ def makeunicodename(unicode, trace):
FILE = "Modules/unicodename_db.h"
- print "--- Preparing", FILE, "..."
+ print("--- Preparing", FILE, "...")
# collect names
names = [None] * len(unicode.chars)
@@ -448,7 +447,7 @@ def makeunicodename(unicode, trace):
if name and name[0] != "<":
names[char] = name + chr(0)
- print len(filter(lambda n: n is not None, names)), "distinct names"
+ print(len(filter(lambda n: n is not None, names)), "distinct names")
# collect unique words from names (note that we differ between
# words inside a sentence, and words ending a sentence. the
@@ -469,7 +468,7 @@ def makeunicodename(unicode, trace):
else:
words[w] = [len(words)]
- print n, "words in text;", b, "bytes"
+ print(n, "words in text;", b, "bytes")
wordlist = words.items()
@@ -485,19 +484,19 @@ def makeunicodename(unicode, trace):
escapes = 0
while escapes * 256 < len(wordlist):
escapes = escapes + 1
- print escapes, "escapes"
+ print(escapes, "escapes")
short = 256 - escapes
assert short > 0
- print short, "short indexes in lexicon"
+ print(short, "short indexes in lexicon")
# statistics
n = 0
for i in range(short):
n = n + len(wordlist[i][1])
- print n, "short indexes in phrasebook"
+ print(n, "short indexes in phrasebook")
# pick the most commonly used words, and sort the rest on falling
# length (to maximize overlap)
@@ -566,29 +565,29 @@ def makeunicodename(unicode, trace):
codehash = Hash("code", data, 47)
- print "--- Writing", FILE, "..."
+ print("--- Writing", FILE, "...")
fp = open(FILE, "w")
- print >>fp, "/* this file was generated by %s %s */" % (SCRIPT, VERSION)
- print >>fp
- print >>fp, "#define NAME_MAXLEN", 256
- print >>fp
- print >>fp, "/* lexicon */"
+ print("/* this file was generated by %s %s */" % (SCRIPT, VERSION), file=fp)
+ print(file=fp)
+ print("#define NAME_MAXLEN", 256, file=fp)
+ print(file=fp)
+ print("/* lexicon */", file=fp)
Array("lexicon", lexicon).dump(fp, trace)
Array("lexicon_offset", lexicon_offset).dump(fp, trace)
# split decomposition index table
offset1, offset2, shift = splitbins(phrasebook_offset, trace)
- print >>fp, "/* code->name phrasebook */"
- print >>fp, "#define phrasebook_shift", shift
- print >>fp, "#define phrasebook_short", short
+ print("/* code->name phrasebook */", file=fp)
+ print("#define phrasebook_shift", shift, file=fp)
+ print("#define phrasebook_short", short, file=fp)
Array("phrasebook", phrasebook).dump(fp, trace)
Array("phrasebook_offset1", offset1).dump(fp, trace)
Array("phrasebook_offset2", offset2).dump(fp, trace)
- print >>fp, "/* name->code dictionary */"
+ print("/* name->code dictionary */", file=fp)
codehash.dump(fp, trace)
fp.close()
@@ -781,7 +780,7 @@ class Hash:
else:
raise AssertionError, "ran out of polynominals"
- print size, "slots in hash table"
+ print(size, "slots in hash table")
table = [None] * size
@@ -813,7 +812,7 @@ class Hash:
if incr > mask:
incr = incr ^ poly
- print n, "collisions"
+ print(n, "collisions")
self.collisions = n
for i in range(len(table)):
@@ -845,7 +844,7 @@ class Array:
# write data to file, as a C array
size = getsize(self.data)
if trace:
- print >>sys.stderr, self.name+":", size*len(self.data), "bytes"
+ print(self.name+":", size*len(self.data), "bytes", file=sys.stderr)
file.write("static ")
if size == 1:
file.write("unsigned char")
@@ -895,10 +894,10 @@ def splitbins(t, trace=0):
import sys
if trace:
def dump(t1, t2, shift, bytes):
- print >>sys.stderr, "%d+%d bins at shift %d; %d bytes" % (
- len(t1), len(t2), shift, bytes)
- print >>sys.stderr, "Size of original table:", len(t)*getsize(t), \
- "bytes"
+ print("%d+%d bins at shift %d; %d bytes" % (
+ len(t1), len(t2), shift, bytes), file=sys.stderr)
+ print("Size of original table:", len(t)*getsize(t), \
+ "bytes", file=sys.stderr)
n = len(t)-1 # last valid index
maxshift = 0 # the most we can shift n and still have something left
if n > 0:
@@ -930,7 +929,7 @@ def splitbins(t, trace=0):
bytes = b
t1, t2, shift = best
if trace:
- print >>sys.stderr, "Best:",
+ print("Best:", end=' ', file=sys.stderr)
dump(t1, t2, shift, bytes)
if __debug__:
# exhaustively verify that the decomposition is correct