1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
|
import unittest
from test.test_tools import toolsdir, imports_under_tool
from test import support
from test.support.hypothesis_helper import hypothesis
st = hypothesis.strategies
given = hypothesis.given
example = hypothesis.example
with imports_under_tool("unicode"):
from dawg import Dawg, build_compression_dawg, lookup, inverse_lookup
@st.composite
def char_name_db(draw, min_length=1, max_length=30):
m = draw(st.integers(min_value=min_length, max_value=max_length))
names = draw(
st.sets(st.text("abcd", min_size=1, max_size=10), min_size=m, max_size=m)
)
characters = draw(st.sets(st.characters(), min_size=m, max_size=m))
return list(zip(names, characters))
class TestDawg(unittest.TestCase):
"""Tests for the directed acyclic word graph data structure that is used
to store the unicode character names in unicodedata. Tests ported from PyPy
"""
def test_dawg_direct_simple(self):
dawg = Dawg()
dawg.insert("a", -4)
dawg.insert("c", -2)
dawg.insert("cat", -1)
dawg.insert("catarr", 0)
dawg.insert("catnip", 1)
dawg.insert("zcatnip", 5)
packed, data, inverse = dawg.finish()
self.assertEqual(lookup(packed, data, b"a"), -4)
self.assertEqual(lookup(packed, data, b"c"), -2)
self.assertEqual(lookup(packed, data, b"cat"), -1)
self.assertEqual(lookup(packed, data, b"catarr"), 0)
self.assertEqual(lookup(packed, data, b"catnip"), 1)
self.assertEqual(lookup(packed, data, b"zcatnip"), 5)
self.assertRaises(KeyError, lookup, packed, data, b"b")
self.assertRaises(KeyError, lookup, packed, data, b"catni")
self.assertRaises(KeyError, lookup, packed, data, b"catnipp")
self.assertEqual(inverse_lookup(packed, inverse, -4), b"a")
self.assertEqual(inverse_lookup(packed, inverse, -2), b"c")
self.assertEqual(inverse_lookup(packed, inverse, -1), b"cat")
self.assertEqual(inverse_lookup(packed, inverse, 0), b"catarr")
self.assertEqual(inverse_lookup(packed, inverse, 1), b"catnip")
self.assertEqual(inverse_lookup(packed, inverse, 5), b"zcatnip")
self.assertRaises(KeyError, inverse_lookup, packed, inverse, 12)
def test_forbid_empty_dawg(self):
dawg = Dawg()
self.assertRaises(ValueError, dawg.finish)
@given(char_name_db())
@example([("abc", "a"), ("abd", "b")])
@example(
[
("bab", "1"),
("a", ":"),
("ad", "@"),
("b", "<"),
("aacc", "?"),
("dab", "D"),
("aa", "0"),
("ab", "F"),
("aaa", "7"),
("cbd", "="),
("abad", ";"),
("ac", "B"),
("abb", "4"),
("bb", "2"),
("aab", "9"),
("caaaaba", "E"),
("ca", ">"),
("bbaaa", "5"),
("d", "3"),
("baac", "8"),
("c", "6"),
("ba", "A"),
]
)
@example(
[
("bcdac", "9"),
("acc", "g"),
("d", "d"),
("daabdda", "0"),
("aba", ";"),
("c", "6"),
("aa", "7"),
("abbd", "c"),
("badbd", "?"),
("bbd", "f"),
("cc", "@"),
("bb", "8"),
("daca", ">"),
("ba", ":"),
("baac", "3"),
("dbdddac", "a"),
("a", "2"),
("cabd", "b"),
("b", "="),
("abd", "4"),
("adcbd", "5"),
("abc", "e"),
("ab", "1"),
]
)
def test_dawg(self, data):
# suppress debug prints
with support.captured_stdout() as output:
# it's enough to build it, building will also check the result
build_compression_dawg(data)
|