summaryrefslogtreecommitdiffstats
path: root/Tools
diff options
context:
space:
mode:
authorMartin v. Löwis <martin@v.loewis.de>2010-11-22 09:00:02 (GMT)
committerMartin v. Löwis <martin@v.loewis.de>2010-11-22 09:00:02 (GMT)
commit5cbc71e50ab6fa8c3cb0cfc64ca4bf5fb174ffcc (patch)
tree715424b42ec0bcc92caf84ea98425efc6b45347e /Tools
parent249d7e3c2eb50f42295a4bf2e7d58a1e8af89c4c (diff)
downloadcpython-5cbc71e50ab6fa8c3cb0cfc64ca4bf5fb174ffcc.zip
cpython-5cbc71e50ab6fa8c3cb0cfc64ca4bf5fb174ffcc.tar.gz
cpython-5cbc71e50ab6fa8c3cb0cfc64ca4bf5fb174ffcc.tar.bz2
Issue #10459: Update CJK character names to Unicode 6.0.
Diffstat (limited to 'Tools')
-rw-r--r--Tools/unicode/makeunicodedata.py21
1 files changed, 19 insertions, 2 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 0783f17..02b6892 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -70,6 +70,15 @@ PRINTABLE_MASK = 0x400
NODELTA_MASK = 0x800
NUMERIC_MASK = 0x1000
+# these ranges need to match unicodedata.c:is_unified_ideograph
+cjk_ranges = [
+ ('3400', '4DB5'),
+ ('4E00', '9FCB'),
+ ('20000', '2A6D6'),
+ ('2A700', '2B734'),
+ ('2B740', '2B81D')
+]
+
def maketables(trace=0):
print("--- Reading", UNICODE_DATA % "", "...")
@@ -81,7 +90,7 @@ def maketables(trace=0):
for version in old_versions:
print("--- Reading", UNICODE_DATA % ("-"+version), "...")
- old_unicode = UnicodeData(version)
+ old_unicode = UnicodeData(version, cjk_check=False)
print(len(list(filter(None, old_unicode.table))), "characters")
merge_old_version(version, unicode, old_unicode)
@@ -804,7 +813,8 @@ class UnicodeData:
def __init__(self, version,
linebreakprops=False,
- expand=1):
+ expand=1,
+ cjk_check=True):
self.changed = []
file = open_data(UNICODE_DATA, version)
table = [None] * 0x110000
@@ -816,6 +826,8 @@ class UnicodeData:
char = int(s[0], 16)
table[char] = s
+ cjk_ranges_found = []
+
# expand first-last ranges
if expand:
field = None
@@ -826,12 +838,17 @@ class UnicodeData:
s[1] = ""
field = s
elif s[1][-5:] == "Last>":
+ if s[1].startswith("<CJK Ideograph"):
+ cjk_ranges_found.append((field[0],
+ s[0]))
s[1] = ""
field = None
elif field:
f2 = field[:]
f2[0] = "%X" % i
table[i] = f2
+ if cjk_check and cjk_ranges != cjk_ranges_found:
+ raise ValueError("CJK ranges deviate: have %r" % cjk_ranges_found)
# public attributes
self.filename = UNICODE_DATA % ''