diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-13 14:10:44 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-13 14:10:44 (GMT) |
commit | 4b92cc3f7924e455b7e41cf1a66034a44ede0cc0 (patch) | |
tree | 46a2c5992e840899d901932bb2559086d500fcc3 /Lib/test | |
parent | 32b6371460e9b3b2708be2e732599f3c9a1fe336 (diff) | |
download | cpython-4b92cc3f7924e455b7e41cf1a66034a44ede0cc0.zip cpython-4b92cc3f7924e455b7e41cf1a66034a44ede0cc0.tar.gz cpython-4b92cc3f7924e455b7e41cf1a66034a44ede0cc0.tar.bz2 |
#13960: HTMLParser is now able to handle broken comments.
Diffstat (limited to 'Lib/test')
-rw-r--r-- | Lib/test/test_htmlparser.py | 58 |
1 files changed, 37 insertions, 21 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 14ed80c..29a721c 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -114,7 +114,7 @@ comment1b--> <Img sRc='Bar' isMAP>sample text “ -<!--comment2a-- --comment2b--><!> +<!--comment2a-- --comment2b--> </Html> """, [ ("data", "\n"), @@ -142,24 +142,6 @@ text ("data", " foo"), ]) - def test_doctype_decl(self): - inside = """\ -DOCTYPE html [ - <!ELEMENT html - O EMPTY> - <!ATTLIST html - version CDATA #IMPLIED - profile CDATA 'DublinCore'> - <!NOTATION datatype SYSTEM 'http://xml.python.org/notations/python-module'> - <!ENTITY myEntity 'internal parsed entity'> - <!ENTITY anEntity SYSTEM 'http://xml.python.org/entities/something.xml'> - <!ENTITY % paramEntity 'name|name|name'> - %paramEntity; - <!-- comment --> -]""" - self._run_check("<!%s>" % inside, [ - ("decl", inside), - ]) - def test_bad_nesting(self): # Strangely, this *is* supposed to test that overlapping # elements are allowed. HTMLParser is more geared toward @@ -182,7 +164,8 @@ DOCTYPE html [ ]) def test_illegal_declarations(self): - self._parse_error('<!spacer type="block" height="25">') + self._run_check('<!spacer type="block" height="25">', + [('comment', 'spacer type="block" height="25"')]) def test_starttag_end_boundary(self): self._run_check("""<a b='<'>""", [("starttag", "a", [("b", "<")])]) @@ -233,7 +216,7 @@ DOCTYPE html [ self._parse_error("<a foo='>") def test_declaration_junk_chars(self): - self._parse_error("<!DOCTYPE foo $ >") + self._run_check("<!DOCTYPE foo $ >", [('decl', 'DOCTYPE foo $ ')]) def test_startendtag(self): self._run_check("<p/>", [ @@ -449,6 +432,39 @@ class AttributesTestCase(TestCaseBase): [("href", "http://www.example.org/\">;")]), ("data", "spam"), ("endtag", "a")]) + def test_comments(self): + html = ("<!-- I'm a valid comment -->" + '<!--me too!-->' + '<!------>' + '<!---->' + '<!----I have many hyphens---->' + '<!-- I have a > in the middle -->' + '<!-- and I have -- in the middle! -->') + expected = [('comment', " I'm a valid comment "), + ('comment', 'me too!'), + ('comment', '--'), + ('comment', ''), + ('comment', '--I have many hyphens--'), + ('comment', ' I have a > in the middle '), + ('comment', ' and I have -- in the middle! ')] + self._run_check(html, expected) + + def test_broken_comments(self): + html = ('<! not really a comment >' + '<! not a comment either -->' + '<! -- close enough -->' + '<!><!<-- this was an empty comment>' + '<!!! another bogus comment !!!>') + expected = [ + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), + ('comment', ''), + ('comment', '<-- this was an empty comment'), + ('comment', '!! another bogus comment !!!'), + ] + self._run_check(html, expected) + def test_condcoms(self): html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->' '<!--[if IE 8]>condcoms<![endif]-->' |