diff options
-rw-r--r-- | Lib/test/test_htmlparser.py | 121 |
1 files changed, 62 insertions, 59 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index b587ab8..ff3ada0 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -196,60 +196,6 @@ DOCTYPE html [ ("data", "this < text > contains < bare>pointy< brackets"), ]) - def test_attr_syntax(self): - output = [ - ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) - ] - self._run_check("""<a b='v' c="v" d=v e>""", output) - self._run_check("""<a b = 'v' c = "v" d = v e>""", output) - self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) - self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) - - def test_attr_values(self): - self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", - [("starttag", "a", [("b", "xxx\n\txxx"), - ("c", "yyy\t\nyyy"), - ("d", "\txyz\n")]) - ]) - self._run_check("""<a b='' c="">""", [ - ("starttag", "a", [("b", ""), ("c", "")]), - ]) - # Regression test for SF patch #669683. - self._run_check("<e a=rgb(1,2,3)>", [ - ("starttag", "e", [("a", "rgb(1,2,3)")]), - ]) - # Regression test for SF bug #921657. - self._run_check("<a href=mailto:xyz@example.com>", [ - ("starttag", "a", [("href", "mailto:xyz@example.com")]), - ]) - - def test_attr_nonascii(self): - # see issue 7311 - self._run_check("<img src=/foo/bar.png alt=\u4e2d\u6587>", [ - ("starttag", "img", [("src", "/foo/bar.png"), - ("alt", "\u4e2d\u6587")]), - ]) - self._run_check("<a title='\u30c6\u30b9\u30c8' " - "href='\u30c6\u30b9\u30c8.html'>", [ - ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), - ("href", "\u30c6\u30b9\u30c8.html")]), - ]) - self._run_check('<a title="\u30c6\u30b9\u30c8" ' - 'href="\u30c6\u30b9\u30c8.html">', [ - ("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), - ("href", "\u30c6\u30b9\u30c8.html")]), - ]) - - def test_attr_entity_replacement(self): - self._run_check("""<a b='&><"''>""", [ - ("starttag", "a", [("b", "&><\"'")]), - ]) - - def test_attr_funky_names(self): - self._run_check("""<a a.b='v' c:d=v e-f=v>""", [ - ("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")]), - ]) - def test_illegal_declarations(self): self._parse_error('<!spacer type="block" height="25">') @@ -358,10 +304,6 @@ DOCTYPE html [ ("endtag", element_lower)]) - def test_entityrefs_in_attributes(self): - self._run_check("<html foo='€&aa&unsupported;'>", - [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])]) - class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): @@ -458,8 +400,69 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): self.assertEqual(p.unescape('{ ' * 1050), '{ ' * 1050) +class AttributesStrictTestCase(TestCaseBase): + + def get_collector(self): + return EventCollector(strict=True) + + def test_attr_syntax(self): + output = [ + ("starttag", "a", [("b", "v"), ("c", "v"), ("d", "v"), ("e", None)]) + ] + self._run_check("""<a b='v' c="v" d=v e>""", output) + self._run_check("""<a b = 'v' c = "v" d = v e>""", output) + self._run_check("""<a\nb\n=\n'v'\nc\n=\n"v"\nd\n=\nv\ne>""", output) + self._run_check("""<a\tb\t=\t'v'\tc\t=\t"v"\td\t=\tv\te>""", output) + + def test_attr_values(self): + self._run_check("""<a b='xxx\n\txxx' c="yyy\t\nyyy" d='\txyz\n'>""", + [("starttag", "a", [("b", "xxx\n\txxx"), + ("c", "yyy\t\nyyy"), + ("d", "\txyz\n")])]) + self._run_check("""<a b='' c="">""", + [("starttag", "a", [("b", ""), ("c", "")])]) + # Regression test for SF patch #669683. + self._run_check("<e a=rgb(1,2,3)>", + [("starttag", "e", [("a", "rgb(1,2,3)")])]) + # Regression test for SF bug #921657. + self._run_check( + "<a href=mailto:xyz@example.com>", + [("starttag", "a", [("href", "mailto:xyz@example.com")])]) + + def test_attr_nonascii(self): + # see issue 7311 + self._run_check( + "<img src=/foo/bar.png alt=\u4e2d\u6587>", + [("starttag", "img", [("src", "/foo/bar.png"), + ("alt", "\u4e2d\u6587")])]) + self._run_check( + "<a title='\u30c6\u30b9\u30c8' href='\u30c6\u30b9\u30c8.html'>", + [("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), + ("href", "\u30c6\u30b9\u30c8.html")])]) + self._run_check( + '<a title="\u30c6\u30b9\u30c8" href="\u30c6\u30b9\u30c8.html">', + [("starttag", "a", [("title", "\u30c6\u30b9\u30c8"), + ("href", "\u30c6\u30b9\u30c8.html")])]) + + def test_attr_entity_replacement(self): + self._run_check( + "<a b='&><"''>", + [("starttag", "a", [("b", "&><\"'")])]) + + def test_attr_funky_names(self): + self._run_check( + "<a a.b='v' c:d=v e-f=v>", + [("starttag", "a", [("a.b", "v"), ("c:d", "v"), ("e-f", "v")])]) + + def test_entityrefs_in_attributes(self): + self._run_check( + "<html foo='€&aa&unsupported;'>", + [("starttag", "html", [("foo", "\u20AC&aa&unsupported;")])]) + + def test_main(): - support.run_unittest(HTMLParserStrictTestCase, HTMLParserTolerantTestCase) + support.run_unittest(HTMLParserStrictTestCase, HTMLParserTolerantTestCase, + AttributesStrictTestCase) if __name__ == "__main__": |