summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_ucn.py
blob: e7b8bbdea3597285ad9b6e6f163e848bfacfe49f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
""" Test script for the Unicode implementation.

Written by Bill Tutt.
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

"""#"
from test.test_support import verify, verbose

print 'Testing General Unicode Character Name, and case insensitivity...',

# General and case insensitivity test:
try:
    # put all \N escapes inside exec'd raw strings, to make sure this
    # script runs even if the compiler chokes on \N escapes
    exec r"""
s = u"\N{LATIN CAPITAL LETTER T}" \
    u"\N{LATIN SMALL LETTER H}" \
    u"\N{LATIN SMALL LETTER E}" \
    u"\N{SPACE}" \
    u"\N{LATIN SMALL LETTER R}" \
    u"\N{LATIN CAPITAL LETTER E}" \
    u"\N{LATIN SMALL LETTER D}" \
    u"\N{SPACE}" \
    u"\N{LATIN SMALL LETTER f}" \
    u"\N{LATIN CAPITAL LeTtEr o}" \
    u"\N{LATIN SMaLl LETTER x}" \
    u"\N{SPACE}" \
    u"\N{LATIN SMALL LETTER A}" \
    u"\N{LATIN SMALL LETTER T}" \
    u"\N{LATIN SMALL LETTER E}" \
    u"\N{SPACE}" \
    u"\N{LATIN SMALL LETTER T}" \
    u"\N{LATIN SMALL LETTER H}" \
    u"\N{LATIN SMALL LETTER E}" \
    u"\N{SpAcE}" \
    u"\N{LATIN SMALL LETTER S}" \
    u"\N{LATIN SMALL LETTER H}" \
    u"\N{LATIN SMALL LETTER E}" \
    u"\N{LATIN SMALL LETTER E}" \
    u"\N{LATIN SMALL LETTER P}" \
    u"\N{FULL STOP}"
verify(s == u"The rEd fOx ate the sheep.", s)
"""
except UnicodeError, v:
    print v
print "done."

import unicodedata

print "Testing name to code mapping....",
for char in "SPAM":
    name = "LATIN SMALL LETTER %s" % char
    code = unicodedata.lookup(name)
    verify(unicodedata.name(code) == name)
print "done."

print "Testing hangul syllable names....",
exec r"""
verify(u"\N{HANGUL SYLLABLE GA}" == u"\uac00")
verify(u"\N{HANGUL SYLLABLE GGWEOSS}" == u"\uafe8")
verify(u"\N{HANGUL SYLLABLE DOLS}" == u"\ub3d0")
verify(u"\N{HANGUL SYLLABLE RYAN}" == u"\ub7b8")
verify(u"\N{HANGUL SYLLABLE MWIK}" == u"\ubba0")
verify(u"\N{HANGUL SYLLABLE BBWAEM}" == u"\ubf88")
verify(u"\N{HANGUL SYLLABLE SSEOL}" == u"\uc370")
verify(u"\N{HANGUL SYLLABLE YI}" == u"\uc758")
verify(u"\N{HANGUL SYLLABLE JJYOSS}" == u"\ucb40")
verify(u"\N{HANGUL SYLLABLE KYEOLS}" == u"\ucf28")
verify(u"\N{HANGUL SYLLABLE PAN}" == u"\ud310")
verify(u"\N{HANGUL SYLLABLE HWEOK}" == u"\ud6f8")
verify(u"\N{HANGUL SYLLABLE HIH}" == u"\ud7a3")
"""
try:
    unicodedata.name(u"\ud7a4")
except ValueError:
    pass
else:
    raise AssertionError, "Found name for U+D7A4"
print "done."

print "Testing names of CJK unified ideographs....",
exec r"""
verify(u"\N{CJK UNIFIED IDEOGRAPH-3400}" == u"\u3400")
verify(u"\N{CJK UNIFIED IDEOGRAPH-4DB5}" == u"\u4db5")
verify(u"\N{CJK UNIFIED IDEOGRAPH-4E00}" == u"\u4e00")
verify(u"\N{CJK UNIFIED IDEOGRAPH-9FA5}" == u"\u9fa5")
verify(u"\N{CJK UNIFIED IDEOGRAPH-20000}" == u"\U00020000")
verify(u"\N{CJK UNIFIED IDEOGRAPH-2A6D6}" == u"\U0002a6d6")
"""
print "done."

print "Testing code to name mapping for all BMP characters....",
count = 0
for code in range(0x10000):
    try:
        char = unichr(code)
        name = unicodedata.name(char)
    except (KeyError, ValueError):
        pass
    else:
        verify(unicodedata.lookup(name) == char)
        count += 1
print "done."

print "Found", count, "characters in the unicode name database"

# misc. symbol testing
print "Testing misc. symbols for unicode character name expansion....",
exec r"""
verify(u"\N{PILCROW SIGN}" == u"\u00b6")
verify(u"\N{REPLACEMENT CHARACTER}" == u"\uFFFD")
verify(u"\N{HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK}" == u"\uFF9F")
verify(u"\N{FULLWIDTH LATIN SMALL LETTER A}" == u"\uFF41")
"""
print "done."

# strict error testing:
print "Testing unicode character name expansion strict error handling....",
try:
    unicode("\N{blah}", 'unicode-escape', 'strict')
except UnicodeError:
    pass
else:
    raise AssertionError, "failed to raise an exception when given a bogus character name"

try:
    unicode("\N{" + "x" * 100000 + "}", 'unicode-escape', 'strict')
except UnicodeError:
    pass
else:
    raise AssertionError, "failed to raise an exception when given a very " \
                          "long bogus character name"

try:
    unicode("\N{SPACE", 'unicode-escape', 'strict')
except UnicodeError:
    pass
else:
    raise AssertionError, "failed to raise an exception for a missing closing brace."

try:
    unicode("\NSPACE", 'unicode-escape', 'strict')
except UnicodeError:
    pass
else:
    raise AssertionError, "failed to raise an exception for a missing opening brace."
print "done."