diff options
Diffstat (limited to 'Lib/test/test_htmlparser.py')
-rwxr-xr-x | Lib/test/test_htmlparser.py | 37 |
1 files changed, 23 insertions, 14 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index bb6e0b0..4e8e73c 100755 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -60,6 +60,9 @@ class EventCollector(HTMLParser.HTMLParser): def handle_pi(self, data): self.append(("pi", data)) + def unknown_decl(self, decl): + self.append(("unknown decl", decl)) + class EventCollectorExtra(EventCollector): @@ -70,24 +73,16 @@ class EventCollectorExtra(EventCollector): class TestCaseBase(unittest.TestCase): - # Constant pieces of source and events - prologue = "" - epilogue = "" - initial_events = [] - final_events = [] - - def _run_check(self, source, events, collector=EventCollector): + def _run_check(self, source, expected_events, collector=EventCollector): parser = collector() - parser.feed(self.prologue) for s in source: parser.feed(s) - for c in self.epilogue: - parser.feed(c) parser.close() events = parser.get_events() - self.assertEqual(events, - self.initial_events + events + self.final_events, - "got events:\n" + pprint.pformat(events)) + if events != expected_events: + self.fail("received events did not match expected events\n" + "Expected:\n" + pprint.pformat(expected_events) + + "\nReceived:\n" + pprint.pformat(events)) def _run_check_extra(self, source, events): self._run_check(source, events, EventCollectorExtra) @@ -144,7 +139,13 @@ text DOCTYPE html [ <!ELEMENT html - O EMPTY> <!ATTLIST html - version CDATA #IMPLIED '4.0'> + version CDATA #IMPLIED + profile CDATA 'DublinCore'> + <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'> + <!ENTITY myEntity 'internal parsed entity'> + <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'> + <!ENTITY % paramEntity 'name|name|name'> + %paramEntity; <!-- comment --> ]""" self._run_check("<!%s>" % inside, [ @@ -201,6 +202,14 @@ DOCTYPE html [ ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), ]) + def test_illegal_declarations(self): + s = 'abc<!spacer type="block" height="25">def' + self._run_check(s, [ + ("data", "abc"), + ("unknown decl", 'spacer type="block" height="25"'), + ("data", "def"), + ]) + def test_starttag_end_boundary(self): self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])]) self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])]) |