diff options
author | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-10 08:45:44 (GMT) |
---|---|---|
committer | Ezio Melotti <ezio.melotti@gmail.com> | 2012-02-10 08:45:44 (GMT) |
commit | fa3702dc28fa8aef291785c560832c9af60305a8 (patch) | |
tree | 3f83918e3a492d96c097ea85df7fda26559ab43d /Lib/test/test_htmlparser.py | |
parent | 5b14d732d8790a6a19cc8aa410740575ff94c85a (diff) | |
download | cpython-fa3702dc28fa8aef291785c560832c9af60305a8.zip cpython-fa3702dc28fa8aef291785c560832c9af60305a8.tar.gz cpython-fa3702dc28fa8aef291785c560832c9af60305a8.tar.bz2 |
#13960: HTMLParser is now able to handle broken comments when strict=False.
Diffstat (limited to 'Lib/test/test_htmlparser.py')
-rw-r--r-- | Lib/test/test_htmlparser.py | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/Lib/test/test_htmlparser.py b/Lib/test/test_htmlparser.py index 8c2e25e..7af9131 100644 --- a/Lib/test/test_htmlparser.py +++ b/Lib/test/test_htmlparser.py @@ -323,6 +323,23 @@ DOCTYPE html [ ("endtag", element_lower)], collector=Collector()) + def test_comments(self): + html = ("<!-- I'm a valid comment -->" + '<!--me too!-->' + '<!------>' + '<!---->' + '<!----I have many hyphens---->' + '<!-- I have a > in the middle -->' + '<!-- and I have -- in the middle! -->') + expected = [('comment', " I'm a valid comment "), + ('comment', 'me too!'), + ('comment', '--'), + ('comment', ''), + ('comment', '--I have many hyphens--'), + ('comment', ' I have a > in the middle '), + ('comment', ' and I have -- in the middle! ')] + self._run_check(html, expected) + def test_condcoms(self): html = ('<!--[if IE & !(lte IE 8)]>aren\'t<![endif]-->' '<!--[if IE 8]>condcoms<![endif]-->' @@ -426,6 +443,19 @@ class HTMLParserTolerantTestCase(HTMLParserStrictTestCase): # see #12888 self.assertEqual(p.unescape('{ ' * 1050), '{ ' * 1050) + def test_broken_comments(self): + html = ('<! not really a comment >' + '<! not a comment either -->' + '<! -- close enough -->' + '<!!! another bogus comment !!!>') + expected = [ + ('comment', ' not really a comment '), + ('comment', ' not a comment either --'), + ('comment', ' -- close enough --'), + ('comment', '!! another bogus comment !!!'), + ] + self._run_check(html, expected) + def test_broken_condcoms(self): # these condcoms are missing the '--' after '<!' and before the '>' html = ('<![if !(IE)]>broken condcom<![endif]>' |