diff options
author | Fredrik Lundh <fredrik@pythonware.com> | 2000-09-25 21:01:56 (GMT) |
---|---|---|
committer | Fredrik Lundh <fredrik@pythonware.com> | 2000-09-25 21:01:56 (GMT) |
commit | 0f8fad4969630cde85d84059fb85f4756f000c46 (patch) | |
tree | d5426dffc69fe179e152659ef044b0077a5e5734 /Tools/unicode | |
parent | 858346e4847475137d2e8eaf5e76bfe6deacedb1 (diff) | |
download | cpython-0f8fad4969630cde85d84059fb85f4756f000c46.zip cpython-0f8fad4969630cde85d84059fb85f4756f000c46.tar.gz cpython-0f8fad4969630cde85d84059fb85f4756f000c46.tar.bz2 |
unicode database compression, step 3:
- added decimal digit and digit properties to the unidb tables
Diffstat (limited to 'Tools/unicode')
-rw-r--r-- | Tools/unicode/makeunicodedata.py | 23 |
1 files changed, 19 insertions, 4 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py index b8411ad..c3f44a0 100644 --- a/Tools/unicode/makeunicodedata.py +++ b/Tools/unicode/makeunicodedata.py @@ -8,6 +8,7 @@ # 2000-09-24 fl created (based on bits and pieces from unidb) # 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table # 2000-09-25 fl added character type table +# 2000-09-26 fl added LINEBREAK flags # # written by Fredrik Lundh (fredrik@pythonware.com), September 2000 # @@ -28,11 +29,12 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO", "PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS", "ON" ] +# note: should match definitions in Objects/unicodectype.c ALPHA_MASK = 0x01 DECIMAL_MASK = 0x02 DIGIT_MASK = 0x04 LOWER_MASK = 0x08 -NUMERIC_MASK = 0x10 +LINEBREAK_MASK = 0x10 SPACE_MASK = 0x20 TITLE_MASK = 0x40 UPPER_MASK = 0x80 @@ -144,7 +146,7 @@ def maketables(): # 3) unicode type data # extract unicode types - dummy = (0, 0, 0, 0) + dummy = (0, 0, 0, 0, 0, 0) table = [dummy] cache = {0: dummy} index = [0] * len(unicode.chars) @@ -160,6 +162,8 @@ def maketables(): flags |= ALPHA_MASK if category == "Ll": flags |= LOWER_MASK + if category == "Zl" or bidirectional == "B": + flags |= LINEBREAK_MASK if category == "Zs" or bidirectional in ("WS", "B", "S"): flags |= SPACE_MASK if category in ["Lt", "Lu"]: @@ -179,8 +183,17 @@ def maketables(): title = (int(record[14], 16) - char) & 0xffff else: title = 0 + # decimal digit, integer digit + decimal = 0 + if record[6]: + flags |= DECIMAL_MASK + decimal = int(record[6]) + digit = 0 + if record[7]: + flags |= DIGIT_MASK + digit = int(record[7]) item = ( - flags, upper, lower, title + flags, upper, lower, title, decimal, digit ) # add entry to index and item tables i = cache.get(item) @@ -189,6 +202,8 @@ def maketables(): table.append(item) index[char] = i + print len(table), "ctype entries" + FILE = "Objects/unicodetype_db.h" sys.stdout = open(FILE, "w") @@ -198,7 +213,7 @@ def maketables(): print "/* a list of unique character type descriptors */" print "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {" for item in table: - print " {%d, %d, %d, %d}," % item + print " {%d, %d, %d, %d, %d, %d}," % item print "};" print |