summaryrefslogtreecommitdiffstats
path: root/Lib/test/test_tools/test_makeunicodedata.py
blob: eee68676416c45a24c1b0f3ad88e25df7aa44fd0 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import unittest
from test.test_tools import toolsdir, imports_under_tool
from test import support
from test.support.hypothesis_helper import hypothesis

st = hypothesis.strategies
given = hypothesis.given
example = hypothesis.example


with imports_under_tool("unicode"):
    from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup


@st.composite
def char_name_db(draw, min_length=1, max_length=30):
    m = draw(st.integers(min_value=min_length, max_value=max_length))
    names = draw(
        st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
    )
    characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
    return list(zip(names, characters))


class TestDawg(unittest.TestCase):
    """Tests for the directed acyclic word graph data structure that is used
    to store the unicode character names in unicodedata. Tests ported from PyPy
    """

    def test_dawg_direct_simple(self):
        dawg = Dawg()
        dawg.insert("a", -4)
        dawg.insert("c", -2)
        dawg.insert("cat", -1)
        dawg.insert("catarr", 0)
        dawg.insert("catnip", 1)
        dawg.insert("zcatnip", 5)
        packed, data, inverse = dawg.finish()

        self.assertEqual(lookup(packed, data, b"a"), -4)
        self.assertEqual(lookup(packed, data, b"c"), -2)
        self.assertEqual(lookup(packed, data, b"cat"), -1)
        self.assertEqual(lookup(packed, data, b"catarr"), 0)
        self.assertEqual(lookup(packed, data, b"catnip"), 1)
        self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
        self.assertRaises(KeyError, lookup, packed, data, b"b")
        self.assertRaises(KeyError, lookup, packed, data, b"catni")
        self.assertRaises(KeyError, lookup, packed, data, b"catnipp")

        self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
        self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
        self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
        self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
        self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
        self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
        self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)

    def test_forbid_empty_dawg(self):
        dawg = Dawg()
        self.assertRaises(ValueError, dawg.finish)

    @given(char_name_db())
    @example([("abc", "a"), ("abd", "b")])
    @example(
        [
            ("bab", "1"),
            ("a", ":"),
            ("ad", "@"),
            ("b", "<"),
            ("aacc", "?"),
            ("dab", "D"),
            ("aa", "0"),
            ("ab", "F"),
            ("aaa", "7"),
            ("cbd", "="),
            ("abad", ";"),
            ("ac", "B"),
            ("abb", "4"),
            ("bb", "2"),
            ("aab", "9"),
            ("caaaaba", "E"),
            ("ca", ">"),
            ("bbaaa", "5"),
            ("d", "3"),
            ("baac", "8"),
            ("c", "6"),
            ("ba", "A"),
        ]
    )
    @example(
        [
            ("bcdac", "9"),
            ("acc", "g"),
            ("d", "d"),
            ("daabdda", "0"),
            ("aba", ";"),
            ("c", "6"),
            ("aa", "7"),
            ("abbd", "c"),
            ("badbd", "?"),
            ("bbd", "f"),
            ("cc", "@"),
            ("bb", "8"),
            ("daca", ">"),
            ("ba", ":"),
            ("baac", "3"),
            ("dbdddac", "a"),
            ("a", "2"),
            ("cabd", "b"),
            ("b", "="),
            ("abd", "4"),
            ("adcbd", "5"),
            ("abc", "e"),
            ("ab", "1"),
        ]
    )
    def test_dawg(self, data):
        # suppress debug prints
        with support.captured_stdout() as output:
            # it's enough to build it, building will also check the result
            build_compression_dawg(data)