From c1e73c30e98c9b59460d0f963a391a08156286e5 Mon Sep 17 00:00:00 2001 From: Ezio Melotti Date: Tue, 1 Nov 2011 18:57:15 +0200 Subject: Make sure that the tolerant parser still parses valid HTML correctly. --- Lib/test/test_htmlparser.py | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index c4a8f17..b587ab8 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -72,9 +72,12 @@ class EventCollectorExtra(EventCollector): class TestCaseBase(unittest.TestCase): + def get_collector(self): + raise NotImplementedError + def _run_check(self, source, expected_events, collector=None): if collector is None: - collector = EventCollector() + collector = self.get_collector() parser = collector for s in source: parser.feed(s) @@ -96,7 +99,10 @@ class TestCaseBase(unittest.TestCase): self.assertRaises(html.parser.HTMLParseError, parse) -class HTMLParserTestCase(TestCaseBase): +class HTMLParserStrictTestCase(TestCaseBase): + + def get_collector(self): + return EventCollector(strict=True) def test_processing_instruction_only(self): self._run_check("", [ @@ -353,12 +359,11 @@ DOCTYPE html [ def test_entityrefs_in_attributes(self): - self._run_check("", [ - ("starttag", "html", [("foo", "\u20AC&aa&unsupported;")]) - ]) + self._run_check("", + [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])]) -class HTMLParserTolerantTestCase(TestCaseBase): +class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): def get_collector(self): return EventCollector(strict=False) @@ -374,8 +379,7 @@ class HTMLParserTolerantTestCase(TestCaseBase): ('endtag', 'a'), ('endtag', 'html'), ('data', '\n', [ ('starttag', 'form', [('action', '/xxx.php?a=1&b=2&'), - ('method', 'post')])], - collector=self.get_collector()) + ('method', 'post')])]) def test_weird_chars_in_unquoted_attribute_values(self): self._run_check('
', [ ('starttag', 'form', - [('action', 'bogus|&#()value')])], - collector=self.get_collector()) + [('action', 'bogus|&#()value')])]) def test_correct_detection_of_start_tags(self): # see #13273 @@ -436,7 +437,7 @@ class HTMLParserTolerantTestCase(TestCaseBase): ('endtag', 'b'), ('endtag', 'div') ] - self._run_check(html, expected, collector=self.get_collector()) + self._run_check(html, expected) html = '
The rain' expected = [ @@ -447,7 +448,7 @@ class HTMLParserTolerantTestCase(TestCaseBase): ('data', 'rain'), ('endtag', 'a'), ] - self._run_check(html, expected, collector=self.get_collector()) + self._run_check(html, expected) def test_unescape_function(self): p = html.parser.HTMLParser() @@ -456,8 +457,9 @@ class HTMLParserTolerantTestCase(TestCaseBase): # see #12888 self.assertEqual(p.unescape('{ ' * 1050), '{ ' * 1050) + def test_main(): - support.run_unittest(HTMLParserTestCase, HTMLParserTolerantTestCase) + support.run_unittest(HTMLParserStrictTestCase, HTMLParserTolerantTestCase) if __name__ == "__main__": -- cgit v0.12