diff options
Diffstat (limited to 'Lib/test/test_htmlparser.py')
| -rw-r--r-- | Lib/test/test_htmlparser.py | 76 | 
1 files changed, 7 insertions, 69 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 2d771a2..de8f3e8 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -85,7 +85,7 @@ class EventCollectorCharrefs(EventCollector):  class TestCaseBase(unittest.TestCase):      def get_collector(self): -        raise NotImplementedError +        return EventCollector(convert_charrefs=False)      def _run_check(self, source, expected_events, collector=None):          if collector is None: @@ -105,21 +105,8 @@ class TestCaseBase(unittest.TestCase):          self._run_check(source, events,                          EventCollectorExtra(convert_charrefs=False)) -    def _parse_error(self, source): -        def parse(source=source): -            parser = self.get_collector() -            parser.feed(source) -            parser.close() -        with self.assertRaises(html.parser.HTMLParseError): -            with self.assertWarns(DeprecationWarning): -                parse() - -class HTMLParserStrictTestCase(TestCaseBase): - -    def get_collector(self): -        with support.check_warnings(("", DeprecationWarning), quite=False): -            return EventCollector(strict=True, convert_charrefs=False) +class HTMLParserTestCase(TestCaseBase):      def test_processing_instruction_only(self):          self._run_check("<?processing instruction>", [ @@ -201,9 +188,6 @@ text              ("data", "this < text > contains < bare>pointy< brackets"),              ]) -    def test_illegal_declarations(self): -        self._parse_error('<!spacer type="block" height="25">') -      def test_starttag_end_boundary(self):          self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])])          self._run_check("""<a b='>'>""", [("starttag", "a", [("b", ">")])]) @@ -238,25 +222,6 @@ text          self._run_check(["<!--abc--", ">"], output)          self._run_check(["<!--abc-->", ""], output) -    def test_starttag_junk_chars(self): -        self._parse_error("</>") -        self._parse_error("</$>") -        self._parse_error("</") -        self._parse_error("</a") -        self._parse_error("<a<a>") -        self._parse_error("</a<a>") -        self._parse_error("<!") -        self._parse_error("<a") -        self._parse_error("<a foo='bar'") -        self._parse_error("<a foo='bar") -        self._parse_error("<a foo='>'") -        self._parse_error("<a foo='>") -        self._parse_error("<a$>") -        self._parse_error("<a$b>") -        self._parse_error("<a$b/>") -        self._parse_error("<a$b  >") -        self._parse_error("<a$b  />") -      def test_valid_doctypes(self):          # from http://www.w3.org/QA/2002/04/valid-dtd-list.html          dtds = ['HTML',  # HTML5 doctype @@ -281,9 +246,6 @@ text              self._run_check("<!DOCTYPE %s>" % dtd,                              [('decl', 'DOCTYPE ' + dtd)]) -    def test_declaration_junk_chars(self): -        self._parse_error("<!DOCTYPE foo $ >") -      def test_startendtag(self):          self._run_check("<p/>", [              ("startendtag", "p", []), @@ -384,7 +346,8 @@ text          self._run_check(html, expected)      def test_convert_charrefs(self): -        collector = lambda: EventCollectorCharrefs(convert_charrefs=True) +        # default value for convert_charrefs is now True +        collector = lambda: EventCollectorCharrefs()          self.assertTrue(collector().convert_charrefs)          charrefs = ['"', '"', '"', '"', '"', '"']          # check charrefs in the middle of the text/attributes @@ -421,23 +384,8 @@ text          self._run_check('no charrefs here', [('data', 'no charrefs here')],                          collector=collector()) - -class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): - -    def get_collector(self): -        return EventCollector(convert_charrefs=False) - -    def test_deprecation_warnings(self): -        with self.assertWarns(DeprecationWarning): -            EventCollector()  # convert_charrefs not passed explicitly -        with self.assertWarns(DeprecationWarning): -            EventCollector(strict=True) -        with self.assertWarns(DeprecationWarning): -            EventCollector(strict=False) -        with self.assertRaises(html.parser.HTMLParseError): -            with self.assertWarns(DeprecationWarning): -                EventCollector().error('test') - +    # the remaining tests were for the "tolerant" parser (which is now +    # the default), and check various kind of broken markup      def test_tolerant_parsing(self):          self._run_check('<html <html>te>>xt&a<<bc</a></html>\n'                          '<img src="URL><//img></html</html>', [ @@ -686,11 +634,7 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase):          self._run_check(html, expected) -class AttributesStrictTestCase(TestCaseBase): - -    def get_collector(self): -        with support.check_warnings(("", DeprecationWarning), quite=False): -            return EventCollector(strict=True, convert_charrefs=False) +class AttributesTestCase(TestCaseBase):      def test_attr_syntax(self):          output = [ @@ -747,12 +691,6 @@ class AttributesStrictTestCase(TestCaseBase):              [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])]) - -class AttributesTolerantTestCase(AttributesStrictTestCase): - -    def get_collector(self): -        return EventCollector(convert_charrefs=False) -      def test_attr_funky_names2(self):          self._run_check(              "<a $><b $=%><c \=/>",  | 
