1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
import formatter
import htmllib
import unittest
from test import support
class AnchorCollector(htmllib.HTMLParser):
def __init__(self, *args, **kw):
self.__anchors = []
htmllib.HTMLParser.__init__(self, *args, **kw)
def get_anchor_info(self):
return self.__anchors
def anchor_bgn(self, *args):
self.__anchors.append(args)
class DeclCollector(htmllib.HTMLParser):
def __init__(self, *args, **kw):
self.__decls = []
htmllib.HTMLParser.__init__(self, *args, **kw)
def get_decl_info(self):
return self.__decls
def unknown_decl(self, data):
self.__decls.append(data)
class HTMLParserTestCase(unittest.TestCase):
def test_anchor_collection(self):
# See SF bug #467059.
parser = AnchorCollector(formatter.NullFormatter(), verbose=1)
parser.feed(
"""<a href='http://foo.org/' name='splat'> </a>
<a href='http://www.python.org/'> </a>
<a name='frob'> </a>
""")
parser.close()
self.assertEquals(parser.get_anchor_info(),
[('http://foo.org/', 'splat', ''),
('http://www.python.org/', '', ''),
('', 'frob', ''),
])
def test_decl_collection(self):
# See SF patch #545300
parser = DeclCollector(formatter.NullFormatter(), verbose=1)
parser.feed(
"""<html>
<body>
hallo
<![if !supportEmptyParas]> <![endif]>
</body>
</html>
""")
parser.close()
self.assertEquals(parser.get_decl_info(),
["if !supportEmptyParas",
"endif"
])
def test_main():
support.run_unittest(HTMLParserTestCase)
if __name__ == "__main__":
test_main()
|