diff options
Diffstat (limited to 'Lib/test/test_ucn.py')
-rw-r--r-- | Lib/test/test_ucn.py | 88 |
1 files changed, 81 insertions, 7 deletions
diff --git a/Lib/test/test_ucn.py b/Lib/test/test_ucn.py index fd620f0..68a3219 100644 --- a/Lib/test/test_ucn.py +++ b/Lib/test/test_ucn.py @@ -8,8 +8,11 @@ Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com) """#" import unittest +import unicodedata from test import support +from http.client import HTTPException +from test.test_normalization import check_version class UnicodeNamesTest(unittest.TestCase): @@ -59,8 +62,6 @@ class UnicodeNamesTest(unittest.TestCase): ) def test_ascii_letters(self): - import unicodedata - for char in "".join(map(chr, range(ord("a"), ord("z")))): name = "LATIN SMALL LETTER %s" % char.upper() code = unicodedata.lookup(name) @@ -81,7 +82,6 @@ class UnicodeNamesTest(unittest.TestCase): self.checkletter("HANGUL SYLLABLE HWEOK", "\ud6f8") self.checkletter("HANGUL SYLLABLE HIH", "\ud7a3") - import unicodedata self.assertRaises(ValueError, unicodedata.name, "\ud7a4") def test_cjk_unified_ideographs(self): @@ -97,14 +97,11 @@ class UnicodeNamesTest(unittest.TestCase): self.checkletter("CJK UNIFIED IDEOGRAPH-2B81D", "\U0002B81D") def test_bmp_characters(self): - import unicodedata - count = 0 for code in range(0x10000): char = chr(code) name = unicodedata.name(char, None) if name is not None: self.assertEqual(unicodedata.lookup(name), char) - count += 1 def test_misc_symbols(self): self.checkletter("PILCROW SIGN", "\u00b6") @@ -112,8 +109,85 @@ class UnicodeNamesTest(unittest.TestCase): self.checkletter("HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK", "\uFF9F") self.checkletter("FULLWIDTH LATIN SMALL LETTER A", "\uFF41") + def test_aliases(self): + # Check that the aliases defined in the NameAliases.txt file work. + # This should be updated when new aliases are added or the file + # should be downloaded and parsed instead. See #12753. + aliases = [ + ('LATIN CAPITAL LETTER GHA', 0x01A2), + ('LATIN SMALL LETTER GHA', 0x01A3), + ('KANNADA LETTER LLLA', 0x0CDE), + ('LAO LETTER FO FON', 0x0E9D), + ('LAO LETTER FO FAY', 0x0E9F), + ('LAO LETTER RO', 0x0EA3), + ('LAO LETTER LO', 0x0EA5), + ('TIBETAN MARK BKA- SHOG GI MGO RGYAN', 0x0FD0), + ('YI SYLLABLE ITERATION MARK', 0xA015), + ('PRESENTATION FORM FOR VERTICAL RIGHT WHITE LENTICULAR BRACKET', 0xFE18), + ('BYZANTINE MUSICAL SYMBOL FTHORA SKLIRON CHROMA VASIS', 0x1D0C5) + ] + for alias, codepoint in aliases: + self.checkletter(alias, chr(codepoint)) + name = unicodedata.name(chr(codepoint)) + self.assertNotEqual(name, alias) + self.assertEqual(unicodedata.lookup(alias), + unicodedata.lookup(name)) + with self.assertRaises(KeyError): + unicodedata.ucd_3_2_0.lookup(alias) + + def test_aliases_names_in_pua_range(self): + # We are storing aliases in the PUA 15, but their names shouldn't leak + for cp in range(0xf0000, 0xf0100): + with self.assertRaises(ValueError) as cm: + unicodedata.name(chr(cp)) + self.assertEqual(str(cm.exception), 'no such name') + + def test_named_sequences_names_in_pua_range(self): + # We are storing named seq in the PUA 15, but their names shouldn't leak + for cp in range(0xf0100, 0xf0fff): + with self.assertRaises(ValueError) as cm: + unicodedata.name(chr(cp)) + self.assertEqual(str(cm.exception), 'no such name') + + def test_named_sequences_sample(self): + # Check a few named sequences. See #12753. + sequences = [ + ('LATIN SMALL LETTER R WITH TILDE', '\u0072\u0303'), + ('TAMIL SYLLABLE SAI', '\u0BB8\u0BC8'), + ('TAMIL SYLLABLE MOO', '\u0BAE\u0BCB'), + ('TAMIL SYLLABLE NNOO', '\u0BA3\u0BCB'), + ('TAMIL CONSONANT KSS', '\u0B95\u0BCD\u0BB7\u0BCD'), + ] + for seqname, codepoints in sequences: + self.assertEqual(unicodedata.lookup(seqname), codepoints) + with self.assertRaises(SyntaxError): + self.checkletter(seqname, None) + with self.assertRaises(KeyError): + unicodedata.ucd_3_2_0.lookup(seqname) + + def test_named_sequences_full(self): + # Check all the named sequences + url = ("http://www.unicode.org/Public/%s/ucd/NamedSequences.txt" % + unicodedata.unidata_version) + try: + testdata = support.open_urlresource(url, encoding="utf-8", + check=check_version) + except (IOError, HTTPException): + self.skipTest("Could not retrieve " + url) + self.addCleanup(testdata.close) + for line in testdata: + line = line.strip() + if not line or line.startswith('#'): + continue + seqname, codepoints = line.split(';') + codepoints = ''.join(chr(int(cp, 16)) for cp in codepoints.split()) + self.assertEqual(unicodedata.lookup(seqname), codepoints) + with self.assertRaises(SyntaxError): + self.checkletter(seqname, None) + with self.assertRaises(KeyError): + unicodedata.ucd_3_2_0.lookup(seqname) + def test_errors(self): - import unicodedata self.assertRaises(TypeError, unicodedata.name) self.assertRaises(TypeError, unicodedata.name, 'xx') self.assertRaises(TypeError, unicodedata.lookup) |