summaryrefslogtreecommitdiffstats
path: root/Tools/unicode
diff options
context:
space:
mode:
authorFredrik Lundh <fredrik@pythonware.com>2000-09-25 21:01:56 (GMT)
committerFredrik Lundh <fredrik@pythonware.com>2000-09-25 21:01:56 (GMT)
commit0f8fad4969630cde85d84059fb85f4756f000c46 (patch)
treed5426dffc69fe179e152659ef044b0077a5e5734 /Tools/unicode
parent858346e4847475137d2e8eaf5e76bfe6deacedb1 (diff)
downloadcpython-0f8fad4969630cde85d84059fb85f4756f000c46.zip
cpython-0f8fad4969630cde85d84059fb85f4756f000c46.tar.gz
cpython-0f8fad4969630cde85d84059fb85f4756f000c46.tar.bz2
unicode database compression, step 3:
- added decimal digit and digit properties to the unidb tables
Diffstat (limited to 'Tools/unicode')
-rw-r--r--Tools/unicode/makeunicodedata.py23
1 files changed, 19 insertions, 4 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index b8411ad..c3f44a0 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -8,6 +8,7 @@
# 2000-09-24 fl created (based on bits and pieces from unidb)
# 2000-09-25 fl merged tim's splitbin fixes, separate decomposition table
# 2000-09-25 fl added character type table
+# 2000-09-26 fl added LINEBREAK flags
#
# written by Fredrik Lundh (fredrik@pythonware.com), September 2000
#
@@ -28,11 +29,12 @@ BIDIRECTIONAL_NAMES = [ "", "L", "LRE", "LRO", "R", "AL", "RLE", "RLO",
"PDF", "EN", "ES", "ET", "AN", "CS", "NSM", "BN", "B", "S", "WS",
"ON" ]
+# note: should match definitions in Objects/unicodectype.c
ALPHA_MASK = 0x01
DECIMAL_MASK = 0x02
DIGIT_MASK = 0x04
LOWER_MASK = 0x08
-NUMERIC_MASK = 0x10
+LINEBREAK_MASK = 0x10
SPACE_MASK = 0x20
TITLE_MASK = 0x40
UPPER_MASK = 0x80
@@ -144,7 +146,7 @@ def maketables():
# 3) unicode type data
# extract unicode types
- dummy = (0, 0, 0, 0)
+ dummy = (0, 0, 0, 0, 0, 0)
table = [dummy]
cache = {0: dummy}
index = [0] * len(unicode.chars)
@@ -160,6 +162,8 @@ def maketables():
flags |= ALPHA_MASK
if category == "Ll":
flags |= LOWER_MASK
+ if category == "Zl" or bidirectional == "B":
+ flags |= LINEBREAK_MASK
if category == "Zs" or bidirectional in ("WS", "B", "S"):
flags |= SPACE_MASK
if category in ["Lt", "Lu"]:
@@ -179,8 +183,17 @@ def maketables():
title = (int(record[14], 16) - char) & 0xffff
else:
title = 0
+ # decimal digit, integer digit
+ decimal = 0
+ if record[6]:
+ flags |= DECIMAL_MASK
+ decimal = int(record[6])
+ digit = 0
+ if record[7]:
+ flags |= DIGIT_MASK
+ digit = int(record[7])
item = (
- flags, upper, lower, title
+ flags, upper, lower, title, decimal, digit
)
# add entry to index and item tables
i = cache.get(item)
@@ -189,6 +202,8 @@ def maketables():
table.append(item)
index[char] = i
+ print len(table), "ctype entries"
+
FILE = "Objects/unicodetype_db.h"
sys.stdout = open(FILE, "w")
@@ -198,7 +213,7 @@ def maketables():
print "/* a list of unique character type descriptors */"
print "const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {"
for item in table:
- print " {%d, %d, %d, %d}," % item
+ print " {%d, %d, %d, %d, %d, %d}," % item
print "};"
print