update to Unicode 6.1

author: Benjamin Peterson <benjamin@python.org> 2012-02-21 03:24:29 (GMT)
committer: Benjamin Peterson <benjamin@python.org> 2012-02-21 03:24:29 (GMT)
commit: 71f660e00f1d4de04a6a2d4430f0cadb71edf115 (patch)
tree: 44ef21d5710d9492bd8bc302050242249118ce4c /Tools/unicode
parent: 16fa2a10970a92cba1ee1249ed7bcf7d2c131051 (diff)
download: cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.zip
cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.tar.gz
cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.tar.bz2
1 files changed, 5 insertions, 4 deletions
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
index 17edc3c..db0f8ec 100644
--- a/Tools/unicode/makeunicodedata.py
+++ b/Tools/unicode/makeunicodedata.py
@@ -38,7 +38,7 @@ SCRIPT = sys.argv[0]
 VERSION = "3.2"
 
 # The Unicode Database
-UNIDATA_VERSION = "6.0.0"
+UNIDATA_VERSION = "6.1.0"
 UNICODE_DATA = "UnicodeData%s.txt"
 COMPOSITION_EXCLUSIONS = "CompositionExclusions%s.txt"
 EASTASIAN_WIDTH = "EastAsianWidth%s.txt"
@@ -58,7 +58,7 @@ PUA_16 = range(0x100000, 0x10FFFE)
 
 # we use this ranges of PUA_15 to store name aliases and named sequences
 NAME_ALIASES_START = 0xF0000
-NAMED_SEQUENCES_START = 0xF0100
+NAMED_SEQUENCES_START = 0xF0200
 
 old_versions = ["3.2.0"]
 
@@ -95,7 +95,7 @@ EXTENDED_CASE_MASK = 0x4000
 # these ranges need to match unicodedata.c:is_unified_ideograph
 cjk_ranges = [
     ('3400', '4DB5'),
-    ('4E00', '9FCB'),
+    ('4E00', '9FCC'),
     ('20000', '2A6D6'),
     ('2A700', '2B734'),
     ('2B740', '2B81D')
@@ -958,7 +958,7 @@ class UnicodeData:
                     s = s.strip()
                     if not s or s.startswith('#'):
                         continue
-                    char, name = s.split(';')
+                    char, name, abbrev = s.split(';')
                     char = int(char, 16)
                     self.aliases.append((name, char))
                     # also store the name in the PUA 1
@@ -971,6 +971,7 @@ class UnicodeData:
             # in order to take advantage of the compression and lookup
             # algorithms used for the other characters.
 
+            assert pua_index < NAMED_SEQUENCES_START
             pua_index = NAMED_SEQUENCES_START
             with open_data(NAMED_SEQUENCES, version) as file:
                 for s in file:
author	Benjamin Peterson <benjamin@python.org>	2012-02-21 03:24:29 (GMT)
committer	Benjamin Peterson <benjamin@python.org>	2012-02-21 03:24:29 (GMT)
commit	71f660e00f1d4de04a6a2d4430f0cadb71edf115 (patch)
tree	44ef21d5710d9492bd8bc302050242249118ce4c /Tools/unicode
parent	16fa2a10970a92cba1ee1249ed7bcf7d2c131051 (diff)
download	cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.zip cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.tar.gz cpython-71f660e00f1d4de04a6a2d4430f0cadb71edf115.tar.bz2