Implement names for CJK unified ideographs. Add name to KeyError output.

Verify that the lookup for an existing name succeeds.
author: Martin v. Löwis <martin@v.loewis.de> 2002-11-23 18:01:32 (GMT)
committer: Martin v. Löwis <martin@v.loewis.de> 2002-11-23 18:01:32 (GMT)
commit: ef7fe2e8137824248cf45d316535b76dae302b5a (patch)
tree: 01a0b67fae5af758f8c8cc18459266756f4e1074 /Lib
parent: 8579efc86c18b7b824ec080582f032674e2f8a5e (diff)
download: cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.zip
cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.gz
cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.bz2
2 files changed, 19 insertions, 6 deletions
diff --git a/Lib/test/output/test_ucn b/Lib/test/output/test_ucn
index 1006c07..c41017b 100644
--- a/Lib/test/output/test_ucn
+++ b/Lib/test/output/test_ucn
@@ -2,7 +2,8 @@ test_ucn
 Testing General Unicode Character Name, and case insensitivity... done.
 Testing name to code mapping.... done.
 Testing hangul syllable names.... done.
-Testing code to name mapping for all characters.... done.
-Found 22728 characters in the unicode name database
+Testing names of CJK unified ideographs.... done.
+Testing code to name mapping for all BMP characters.... done.
+Found 50212 characters in the unicode name database
 Testing misc. symbols for unicode character name expansion.... done.
 Testing unicode character name expansion strict error handling.... done.
diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py
index 6f2b022..e7b8bbd 100644
--- a/Lib/test/test_ucn.py
+++ b/Lib/test/test_ucn.py
@@ -80,16 +80,28 @@ else:
     raise AssertionError, "Found name for U+D7A4"
 print "done."
 
-print "Testing code to name mapping for all characters....",
+print "Testing names of CJK unified ideographs....",
+exec r"""
+verify(u"\N{CJK UNIFIED IDEOGRAPH-3400}" == u"\u3400")
+verify(u"\N{CJK UNIFIED IDEOGRAPH-4DB5}" == u"\u4db5")
+verify(u"\N{CJK UNIFIED IDEOGRAPH-4E00}" == u"\u4e00")
+verify(u"\N{CJK UNIFIED IDEOGRAPH-9FA5}" == u"\u9fa5")
+verify(u"\N{CJK UNIFIED IDEOGRAPH-20000}" == u"\U00020000")
+verify(u"\N{CJK UNIFIED IDEOGRAPH-2A6D6}" == u"\U0002a6d6")
+"""
+print "done."
+
+print "Testing code to name mapping for all BMP characters....",
 count = 0
-for code in range(65536):
+for code in range(0x10000):
     try:
         char = unichr(code)
         name = unicodedata.name(char)
-        verify(unicodedata.lookup(name) == char)
-        count += 1
     except (KeyError, ValueError):
         pass
+    else:
+        verify(unicodedata.lookup(name) == char)
+        count += 1
 print "done."
 
 print "Found", count, "characters in the unicode name database"
author	Martin v. Löwis <martin@v.loewis.de>	2002-11-23 18:01:32 (GMT)
committer	Martin v. Löwis <martin@v.loewis.de>	2002-11-23 18:01:32 (GMT)
commit	ef7fe2e8137824248cf45d316535b76dae302b5a (patch)
tree	01a0b67fae5af758f8c8cc18459266756f4e1074 /Lib
parent	8579efc86c18b7b824ec080582f032674e2f8a5e (diff)
download	cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.zip cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.gz cpython-ef7fe2e8137824248cf45d316535b76dae302b5a.tar.bz2